Source code for variationist.visualization.altair_chart

import altair as alt
import functools
import operator
import os
import pandas as pd
import vl_convert as vlc

from typing import Union, Optional

from variationist.visualization.chart import Chart


[docs]class AltairChart(Chart): """A base class for building an alt.Chart chart object.""" def __init__( self, df_data: pd.core.frame.DataFrame, chart_metric: str, metadata: dict, extra_args: dict = {}, zoomable: Optional[bool] = True, ) -> None: """ Initialization function for a building an alt.Chart chart object. Parameters ---------- df_data: pd.core.frame.DataFrame A long-form dataframe storing the results of a prior analysis for a given metric that will be used for visualization purposes. chart_metric: str The metric associated to the "df_data" dataframe and thus to the chart. metadata: dict A dictionary storing the metadata about the prior analysis. extra_args: dict = {} A dictionary storing the extra arguments for this chart type. Default = {}. zoomable: Optional[bool] = True Whether the (HTML) chart should be zoomable using the mouse or not (if this is allowed for the resulting chart type by the underlying visualization library). """ super().__init__( df_data, chart_metric, metadata, extra_args, zoomable) # alt.data_transformers.enable("vegafusion") # Create the base chart object which stores the data self.base_chart = self.create_base_chart(df_data)
[docs] def create_base_chart( self, df_data: pd.core.frame.DataFrame, ) -> alt.Chart: """ A function that creates a base alt.Chart chart for the given data. Parameters ---------- df_data: pd.core.frame.DataFrame A long-form dataframe storing the results of a prior analysis for a given metric that will be used for visualization purposes. Returns ------- base_chart: alt.Chart A base alt.Chart chart object. """ base_chart = alt.Chart(self.df_data).mark_bar() return base_chart
[docs] def add_search_component( self, base_chart: alt.Chart, tooltip: list[alt.Tooltip], dim: Union[alt.Color, alt.Y], ) -> alt.Chart: """ A function that creates a search component and adds it to the chart. Parameters ---------- base_chart: alt.Chart The base chart object in which to add the search component. tooltip: list[alt.Tooltip] A list of alt.Tooltip objects. dim: Union[alt.Color, alt.Y] The alt.Color or alt.Y dimension to be filtered in the chart. Returns ------- base_chart: alt.Chart The same base chart object with the search component added. """ # Create the search component search_input = alt.param( value = "", bind = alt.binding( input = "search", placeholder = f"insert {self.text_label}...", name = f"Filter by {self.text_label} ", ) ) # Add the search component to the base chart base_chart = base_chart.add_params(search_input) # Set conditions for filtering when using the search component base_chart = base_chart.encode( opacity = alt.condition( alt.expr.test(alt.expr.regexp(search_input, "i"), alt.datum.ngram), alt.value(1), alt.value(0) ), color = alt.condition( alt.expr.test(alt.expr.regexp(search_input, "i"), alt.datum.ngram), dim, alt.value("") ), tooltip = tooltip ) return base_chart
[docs] def add_dropdown_components( self, base_chart: alt.Chart, tooltip: list[alt.Tooltip], dropdown_keys: list[str], dropdown_elements: list[list[str]], color: alt.Color, operation: str, ) -> alt.Chart: """ A function that creates dropdown components and adds them to the chart. Parameters ---------- base_chart: alt.Chart The base chart object in which to add the dropdown components. tooltip: list[alt.Tooltip] A list of alt.Tooltip objects. dropdown_keys: list[str] A list of keys corresponding to each dropdown. dropdown_elements: list[str[str]] A list of lists, each containing the values for each dropdown (1:1 with dropdown_keys). color: alt.Color The alt.Color dimension to be filtered in the chart. operation: str Whether the operation based on color is "fill", "color", or "opacity". Returns ------- base_chart: alt.Chart The same base chart object with the dropdown components added. """ # Create a list to store the dropdown objects dropdowns = [] # Iterate over the dropdown keys to create a dropdown component with the given values for i in range(len(dropdown_keys)): # Get the label referring to the dropdown dropdown_label = self.text_label if (dropdown_keys[i] == "ngram") else dropdown_keys[i] # Create the dropdown component dropdown = alt.binding_select( options = sorted( ["*Select " + str(dropdown_label) + "*"] + [str(el) for el in dropdown_elements[i]]), name = f"Filter by {dropdown_label} ", ) select = alt.selection_point( value = f"*Select {dropdown_label}*", bind = dropdown, fields = [dropdown_keys[i]], ) # Add the dropdown component to the base chart base_chart = base_chart.add_params(select) base_chart = base_chart.transform_filter(select) # Add it to the list of dropdown component dropdowns.append(select) # Encoding the data by considering all the dropdown components and the operation if operation == "fill": base_chart = base_chart.encode( fill = alt.condition( functools.reduce(operator.and_, dropdowns), color, alt.value("") ), tooltip = tooltip ) elif operation == "color": base_chart = base_chart.encode( color = alt.condition( functools.reduce(operator.and_, dropdowns), color, alt.value("") ), tooltip = tooltip ) elif operation == "opacity": base_chart = base_chart.encode( color = alt.condition( functools.reduce(operator.and_, dropdowns), color, alt.value("") ), opacity = alt.condition( functools.reduce(operator.and_, dropdowns), "value", alt.value(0) ), tooltip = tooltip ) base_chart = base_chart.configure_legend(disable=True) elif operation == "size": base_chart = base_chart.encode( color = alt.condition( functools.reduce(operator.and_, dropdowns), color, alt.value("") ), size = alt.condition( functools.reduce(operator.and_, dropdowns), "value", alt.value(0) ), tooltip = tooltip ) base_chart = base_chart.configure_legend(disable=True) elif operation == "shape": base_chart = base_chart.encode( color = alt.condition( functools.reduce(operator.and_, dropdowns), color, alt.value("") ), tooltip = tooltip ) base_chart = base_chart.configure_legend(disable=True) else: raise ValueError(f"The operation \"{operation}\" is not envisioned.") return base_chart
[docs] def get_dim( self, dim: Union[int, str], chart_dims: dict, ) -> (str, str): """ A function that returns the name and (altair) type of a variable given a chart dimension and the previously stored variable names, types, and semantics. Parameters ---------- dim: Union[int, str] The dimension of interest (e.g., "x", "y", "lat", "lon", "color", etc). chart_dims: dict The mapping dictionary for the variables of the given chart. Returns ------- var_name: str The variable name referring to the dimension of interest. var_type_: str The (altair) variable type referring to the dimension of interest. """ # Simultaneously order the lists referring to the variables var_types_ord, var_semantics_ord, var_names_ord = zip( *sorted(zip(self.var_types, self.var_semantics, self.var_names))) # Get the ordered list of variable names var_names = list(var_names_ord) # Get the variable name (from string or its index) and the (altair) type for plotting var_name = chart_dims[dim][0] if (type(chart_dims[dim][0])==str) else var_names[chart_dims[dim][0]] var_type_ = chart_dims[dim][1] # @TEMP workaround: Check for unwanted inversions if ("ordinal" in self.var_types) and ("quantitative" in self.var_types): if ("temporal" in self.var_semantics) and ("general" in self.var_semantics): index = 1 if chart_dims[dim][0] == 0 else 0 var_name = chart_dims[dim][0] if (type(chart_dims[dim][0])==str) else var_names[index] return var_name, var_type_
[docs] def save( self, output_folder: str, chart_name: str, output_formats: Optional[list[str]] = ["html"], ) -> None: """ A function that saves the chart to a subfolder (with name matching the metric) of the output folder in various formats. Parameters ---------- output_folder: str A path to the output folder in which to save the chart. chart_name: str A name representing the chart object to be saved. output_formats: Optional[list[str]] = ["html"] A list of output formats for the charts. By default, only the interactive HTML chart is saved, i.e., ["html"]. Extra choices: ["pdf", "svg", "png"]. Note that for very large datasets the extra choices are too heavy to build. """ # If output formats have been specified, save the chart in those formats to # subfolders (named as the metric) of the user-specified output folder if len(output_formats) >= 1: # Create the output folder if it does not exist if not os.path.exists(output_folder): os.makedirs(output_folder) # Save the chart to an HTML file in the output folder if "html" in output_formats: output_filepath = os.path.join(output_folder, chart_name + ".html") print(f"INFO: Saving it to the filepath: \"{output_filepath}\".") self.chart.save(output_filepath) # Save the chart to a PDF file in the output folder if "pdf" in output_formats: try: # Get the raw data from the chart (it requires "vl_convert" to be installed) pdf_data = vlc.vegalite_to_pdf(self.chart.to_json()) # Write the raw data to the output filepath output_filepath = os.path.join(output_folder, chart_name + ".pdf") print(f"INFO: Saving it to the filepath: \"{output_filepath}\".") with open(output_filepath, "wb") as f: f.write(pdf_data) except Exception: print("The dataset is too big to be serialized as PDF efficiently. Please " "use the interactive HTML.") # Save the chart to a SVG file in the output folder if "svg" in output_formats: try: # Get the raw data from the chart (it requires "vl_convert" to be installed) svg_data = vlc.vegalite_to_svg(self.chart.to_json()) # Write the raw data to the output filepath output_filepath = os.path.join(output_folder, chart_name + ".svg") print(f"INFO: Saving it to the filepath: \"{output_filepath}\".") with open(output_filepath, "wt") as f: f.write(svg_data) except Exception: print("The dataset is too big to be serialized as SVG efficiently. Please " "use the interactive HTML.") # Save the chart to a PNG file in the output folder if "png" in output_formats: try: # Get the raw data from the chart (it requires "vl_convert" to be installed) png_data = vlc.vegalite_to_png(self.chart.to_json()) # Write the raw data to the output filepath output_filepath = os.path.join(output_folder, chart_name + ".png") print(f"INFO: Saving it to the filepath: \"{output_filepath}\".") with open(output_filepath, "wb") as f: f.write(png_data) except Exception: print("The dataset is too big to be serialized as PNG efficiently. Please " "use the interactive HTML.") # Otherwise, raise an error else: raise TypeError(f"ERROR: No output formats have been specified.")