Source code for variationist.visualization.scatter_geo_chart

import altair as alt
import geopandas as gpd
import os
import pandas as pd

from typing import Optional

from variationist.visualization.altair_chart import AltairChart

# Speed up vector-based spatial data processing
# See: https://geopandas.org/en/stable/docs/user_guide/io.html#reading-spatial-data
gpd.options.io_engine = "pyogrio"


[docs]class ScatterGeoChart(AltairChart): """A class for building a ScatterGeoChart object.""" def __init__( self, df_data: pd.core.frame.DataFrame, chart_metric: str, metadata: dict, extra_args: dict = {}, chart_dims: dict = {}, zoomable: Optional[bool] = True, top_per_class_ngrams: Optional[int] = None, shapefile_path: Optional[str] = None, ) -> None: """ Initialization function for a building a ScatterGeoChart object. Parameters ---------- df_data: pd.core.frame.DataFrame A long-form dataframe storing the results of a prior analysis for a given metric that will be used for visualization purposes. chart_metric: str The metric associated to the "df_data" dataframe and thus to the chart. metadata: dict A dictionary storing the metadata about the prior analysis. extra_args: dict = {} A dictionary storing the extra arguments for this chart type. Default = {}. chart_dims: dict The mapping dictionary for the variables for the given chart. zoomable: Optional[bool] = True Whether the (HTML) chart should be zoomable using the mouse or not. top_per_class_ngrams: int = 20 The maximum number of highest scoring per-class n-grams to show (for bar charts only). If set to None, it will show all the n-grams in the corpus (it may easily be overwhelming). By default is 20 to keep the visualization compact. This parameter is ignored when creating other chart types. shapefile_path: Optional[str] = None A path to the .shp shapefile to be visualized as background map to the chart. Note that auxiliary files to the .shp one (i.e., .dbf, .prg, .shx ones) are required for chart creation too, but do not need to be specified. They should have the same name as the .shp file but different extension, and be located in the same folder as the .shp file itself. An example of repository where to find shapefiles is https://geodata.lib.berkeley.edu/, but there exists many other ones and shapefiles provided by national/regional institutions. """ super().__init__(df_data, chart_metric, metadata, extra_args, zoomable) # Set attributes self.top_per_class_ngrams = top_per_class_ngrams self.metric_label = chart_metric + " value" if self.n_cooc == 1: self.text_label = (str(self.n_tokens) + "-gram") if self.n_tokens > 1 else "token" else: self.text_label = "tokens" # Set extra attributes self.shapefile_path = extra_args["shapefile_path"] # Check if the specified filepath "shapefile_path" is defined and exists. If not, warn and exit if self.shapefile_path is None: raise ValueError(f"ERROR. \"shapefile_path\" must be specified for creating spatial charts.\n") if not os.path.exists(self.shapefile_path): raise ValueError(f"ERROR. The filepath for the shapefile \"{self.shapefile_path}\" does not exist.\n") # Load the shapefile and transform geometries to a standard coordinate reference system gdf = gpd.read_file(self.shapefile_path).to_crs("epsg:4286") # Set background chart style background = alt.Chart(gdf).mark_geoshape( stroke="white", strokeWidth=0.5, fill="#e1e7e3") # Set base chart style self.base_chart = self.base_chart.mark_point( size=50, strokeWidth=1, opacity=1) # Get relevant dimensions lat_name, lat_type = self.get_dim("lat", chart_dims) lon_name, lon_type = self.get_dim("lon", chart_dims) color_name, color_type = self.get_dim("color", chart_dims) # Set dimensions lat_dim = alt.Latitude(lat_name, type=lat_type) lon_dim = alt.Longitude(lon_name, type=lon_type) color = alt.Color(color_name, type=color_type, title=chart_metric, scale=alt.Scale(scheme="lighttealblue", domainMin=min(self.df_data[color_name]))) # Set tooltip tooltip = [ alt.Tooltip("ngram", type="nominal", title=self.text_label), alt.Tooltip(lat_name, type=lat_type), alt.Tooltip(lon_name, type=lon_type), alt.Tooltip(color_name, type=color_type, title=self.metric_label) ] # Encoding the data self.base_chart = self.base_chart.encode( # Note: fill=color will be conditionally added by the "add_dropdown_component" lat_dim, lon_dim, tooltip = tooltip ) # Set extra properties for both background and foreground layers chart_base_size = 600 background = background.properties(width=chart_base_size, height=chart_base_size) self.base_chart = self.base_chart.properties(width=chart_base_size, height=chart_base_size) # The chart has to be filterable, therefore create and add search/dropdown components to it dropdown_keys = [] dropdown_values = [] for i in range(len(chart_dims["dropdown"])): dropdown_keys.append( self.get_dim("dropdown", {"dropdown": chart_dims["dropdown"][i]})[0]) for dropdown_key in dropdown_keys: dropdown_values.append(list(set(df_data[dropdown_key]))) self.base_chart = self.add_dropdown_components( self.base_chart, tooltip, dropdown_keys, dropdown_values, color, "fill") # If the chart has to be zoomable, set the property (not supported for scatter geo chart by Altair) # if self.zoomable == True: # print(f"INFO: Zoom is not supported for scatter geo charts by Altair.") # self.base_chart = self.base_chart.interactive() # Create the final chart self.chart = background + self.base_chart