Source code for variationist.metrics.metrics

from typing import Callable, Union

from variationist.metrics import corpus_statistics
from variationist.metrics import lexical_variation
from variationist.metrics import pmi


[docs]class Metric: """The Metric class, a generic class that carries out all the metric operations. Parameters ---------- metric: `Union[str, Callable[[dict, dict], dict]]` A metric's name (if chosen among the ones natively supported by Variationist), or a callable function that takes as arguments `label_values_dict` and `subsets_of_interest`, as `[dict, dict], dict`. args: InspectorArgs The arguments selected by the user. """ def __init__( self, metric: Union[str, Callable[[dict, dict], dict]], args ) -> None: """""" self.metric = metric self.args = args if self.metric == "pmi": self.metric_fn = pmi.pmi elif self.metric == "n_pmi": self.metric_fn = pmi.pmi_normalized elif self.metric == "p_pmi": self.metric_fn = pmi.pmi_positive elif self.metric == "np_pmi": self.metric_fn = pmi.pmi_positive_normalized elif self.metric == "w_pmi": self.metric_fn = pmi.pmi_weighted elif self.metric == "nw_pmi": self.metric_fn = pmi.pmi_normalized_weighted elif self.metric == "pw_pmi": self.metric_fn = pmi.pmi_positive_weighted elif self.metric == "npw_pmi": self.metric_fn = pmi.pmi_positive_normalized_weighted elif self.metric == "np_relevance": self.metric_fn = pmi.class_relevance_positive_normalized elif self.metric == "nw_relevance": self.metric_fn = pmi.class_relevance_normalized_weighted elif self.metric == "npw_relevance": self.metric_fn = pmi.class_relevance_positive_normalized_weighted # elif self.metric == "lex_art": # self.metric_fn = pmi.pmi_lexical_artifacts elif self.metric == "ttr": self.metric_fn = lexical_variation.ttr elif self.metric == "root_ttr": self.metric_fn = lexical_variation.rttr elif self.metric == "maas": self.metric_fn = lexical_variation.maas elif self.metric == "log_ttr": self.metric_fn = lexical_variation.lttr elif self.metric == "freq": self.metric_fn = corpus_statistics.create_frequency_dictionary elif self.metric == "stats": self.metric_fn = corpus_statistics.compute_basic_stats elif callable(self.metric): self.metric_fn = self.metric elif type(self.metric) is str: raise NotImplementedError(f"The metric '{self.metric}' is not implemented.") else: raise ValueError(f"The specified metric should be a callable function or a string matching an implemented metric. Got a {type(self.metric)} instead")
[docs] def calculate_metric(self, label_values_dict, subsets_of_interest): """Calls the appropriate metric function. Parameters ---------- label_values_dict: dict A dictionary containing all of the possible values each variable can take in the input dataset. subsets_of_interest: dict A dictionary containing a pandas series with tokenized texts for each variable/text column combination out of the variables and text columns specified by the user. Returns ------- :dict A `dict` with the results of the calculated metric function. """ return self.metric_fn(label_values_dict, subsets_of_interest, self.args)