Source code for variationist.visualization.diversity_bar_chart

import altair as alt
import pandas as pd

from typing import Optional

from variationist.visualization.altair_chart import AltairChart


[docs]class DiversityBarChart(AltairChart): """A class for building a BarChart object for lexical diversity metrics.""" def __init__( self, df_data: pd.core.frame.DataFrame, chart_metric: str, metadata: dict, extra_args: dict = {}, chart_dims: dict = {}, zoomable: Optional[bool] = True, top_per_class_ngrams: Optional[int] = None, ) -> None: """ Initialization function for a building a BarChart object for lexical diversity metrics. Parameters ---------- df_data: pd.core.frame.DataFrame A long-form dataframe storing the results of a prior analysis for a given metric that will be used for visualization purposes. chart_metric: str The metric associated to the "df_data" dataframe and thus to the chart. metadata: dict A dictionary storing the metadata about the prior analysis. extra_args: dict = {} A dictionary storing the extra arguments for this chart type. Default = {}. chart_dims: dict The mapping dictionary for the variables for the given chart. zoomable: Optional[bool] = True Whether the (HTML) chart should be zoomable using the mouse or not (if this is allowed for the resulting chart type by the underlying visualization library). top_per_class_ngrams: int = 20 The maximum number of highest scoring per-class n-grams to show (for bar charts only). If set to None, it will show all the n-grams in the corpus (it may easily be overwhelming). By default is 20 to keep the visualization compact. This parameter is ignored when creating other chart types. """ super().__init__(df_data, chart_metric, metadata, extra_args, zoomable) # Get relevant dimensions variables = list(self.df_data.keys()) for main_col in ["ngram", "value"]: variables.remove(main_col) x_name, x_type = "value", "quantitative" y_name, y_type = variables[0], "nominal" column_name, column_type = "ngram", "nominal" color_name, color_type = "ngram", "nominal" # Set attributes self.top_per_class_ngrams = top_per_class_ngrams self.metric_label = chart_metric + " value" self.text_label = y_name # Set base chart style self.base_chart = self.base_chart.mark_bar(height=15, binSpacing=0.5, cornerRadiusEnd=5) # Set dimensions x_dim = alt.X(x_name, type=x_type, title="") y_dim = alt.Y(y_name, type=y_type, title="") column_dim = alt.Column(column_name, type=column_type, header=alt.Header(labelFontWeight="bold"), title=chart_metric) color = alt.Color(color_name, color_type, legend=None) # for aestethics only # Set tooltip tooltip = [ alt.Tooltip(y_name, type=y_type, title=self.text_label), alt.Tooltip(x_name, type=x_type, title=self.metric_label) ] # Encoding the data self.base_chart = self.base_chart.encode( x_dim, y_dim, column_dim, color, tooltip ) # Set the independent dimensions self.base_chart = self.base_chart.resolve_scale( x="independent", y="independent" ) # Set extra properties self.base_chart = self.base_chart.properties(width=250, center=True) # The chart has not to be filterable self.base_chart = self.base_chart.encode(color=y_dim) # If the chart has to be zoomable, set the property (disallowed for bar chart) # if self.zoomable == True: # print(f"INFO: Zoom is disallowed for bar charts.") # self.base_chart = self.base_chart.interactive() # Create the final chart self.chart = self.base_chart