Source code for variationist.visualization.scatter_chart

import altair as alt
import pandas as pd

from typing import Optional

from variationist.visualization.altair_chart import AltairChart


[docs]class ScatterChart(AltairChart): """A class for building a ScatterChart object.""" def __init__( self, df_data: pd.core.frame.DataFrame, chart_metric: str, metadata: dict, extra_args: dict = {}, chart_dims: dict = {}, zoomable: Optional[bool] = True, top_per_class_ngrams: Optional[int] = None, ) -> None: """ Initialization function for a building a ScatterChart object. Parameters ---------- df_data: pd.core.frame.DataFrame A long-form dataframe storing the results of a prior analysis for a given metric that will be used for visualization purposes. chart_metric: str The metric associated to the "df_data" dataframe and thus to the chart. metadata: dict A dictionary storing the metadata about the prior analysis. extra_args: dict = {} A dictionary storing the extra arguments for this chart type. Default = {}. chart_dims: dict The mapping dictionary for the variables for the given chart. zoomable: Optional[bool] = True Whether the (HTML) chart should be zoomable using the mouse or not (if this is allowed for the resulting chart type by the underlying visualization library). top_per_class_ngrams: int = 20 The maximum number of highest scoring per-class n-grams to show (for bar charts only). If set to None, it will show all the n-grams in the corpus (it may easily be overwhelming). By default is 20 to keep the visualization compact. This parameter is ignored when creating other chart types. """ super().__init__(df_data, chart_metric, metadata, extra_args, zoomable) # Set attributes self.top_per_class_ngrams = top_per_class_ngrams self.metric_label = chart_metric + " value" if self.n_cooc == 1: self.text_label = (str(self.n_tokens) + "-gram") if self.n_tokens > 1 else "token" else: self.text_label = "tokens" # Set base chart style if "opacity" in chart_dims: self.base_chart = self.base_chart.mark_line( point=alt.OverlayMarkDef(size=75, strokeWidth=0.5), strokeWidth=0) else: self.base_chart = self.base_chart.mark_line(point=True, strokeDash=[1, 0]) # Get relevant dimensions x_name, x_type = self.get_dim("x", chart_dims) y_name, y_type = self.get_dim("y", chart_dims) if "opacity" in chart_dims: opacity_name, opacity_type = self.get_dim("opacity", chart_dims) if "extra" in chart_dims: extra_name, extra_type = self.get_dim("extra", chart_dims) # Use the mean to represent the bin, if defined if df_data[x_name][0].startswith("("): avgs = [] for index, row in df_data.iterrows(): x_min, x_max = row[x_name][1:-1].split(", ") avgs.append((float(x_min) + float(x_max)) / 2) df_data[x_name] = avgs if df_data[y_name][0].startswith("("): avgs = [] for index, row in df_data.iterrows(): y_min, y_max = row[y_name][1:-1].split(", ") avgs.append((float(y_min) + float(y_max)) / 2) df_data[y_name] = avgs # Set dimensions x_domain = list(df_data[x_name].astype(float).unique()) y_domain = list(df_data[y_name].astype(float).unique()) x_dim = alt.X(x_name, type=x_type, scale=alt.Scale(domain=[min(x_domain), max(x_domain)])) if "opacity" in chart_dims: y_dim = alt.Y(y_name, type=y_type, scale=alt.Scale(domain=[min(y_domain), max(y_domain)]), axis=alt.Axis(format=".2f")) opacity = alt.Opacity(opacity_name, opacity_type) else: y_dim = alt.Y(y_name, type=y_type, scale=alt.Scale(domainMin=min(y_domain)), title=chart_metric, axis=alt.Axis(format=".2f")) color = alt.Color("ngram", type="nominal", title="", legend=None) # Set tooltip tooltip = [ alt.Tooltip("ngram", type="nominal", title=self.text_label), alt.Tooltip(x_name, type=x_type), ] if "opacity" in chart_dims: tooltip.append(alt.Tooltip(y_name, type=y_type)) tooltip.append(alt.Tooltip(opacity_name, type=opacity_type, title=self.metric_label)) else: tooltip.append(alt.Tooltip(y_name, type=y_type, title=self.metric_label)) if "extra" in chart_dims: tooltip.append(alt.Tooltip(extra_name, type=extra_type)) # Encoding the data self.base_chart = self.base_chart.encode( x_dim, y_dim, # Note: color will be conditionally added by the "add_search_component" # Note: opacity will be conditionally added by the "add_dropdown_component", if needed tooltip ) # Set extra properties chart_width = 800 self.base_chart = self.base_chart.properties(width=chart_width, center=True) # The chart has to be filterable, therefore create and add a search component to it if ("opacity" in chart_dims) or ("extra" in chart_dims): dropdown_keys = [] dropdown_values = [] for i in range(len(chart_dims["dropdown"])): dropdown_keys.append( self.get_dim("dropdown", {"dropdown": chart_dims["dropdown"][i]})[0]) for dropdown_key in dropdown_keys: dropdown_values.append(list(set(df_data[dropdown_key]))) if "opacity" in chart_dims: self.base_chart = self.add_dropdown_components( self.base_chart, tooltip, dropdown_keys, dropdown_values, color, "opacity") else: self.base_chart = self.add_dropdown_components( self.base_chart, tooltip, dropdown_keys, dropdown_values, color, "color") else: self.base_chart = self.add_search_component(self.base_chart, tooltip, color) # If the chart has to be zoomable, set the property if self.zoomable == True: self.base_chart = self.base_chart.interactive() # Create the final chart self.chart = self.base_chart