from typing import Callable, Union
from variationist.metrics import corpus_statistics
from variationist.metrics import lexical_variation
from variationist.metrics import pmi
[docs]class Metric:
"""The Metric class, a generic class that carries out all the metric operations.
Parameters
----------
metric: `Union[str, Callable[[dict, dict], dict]]`
A metric's name (if chosen among the ones natively supported by Variationist), or a callable function that takes as arguments `label_values_dict` and `subsets_of_interest`, as `[dict, dict], dict`.
args: InspectorArgs
The arguments selected by the user.
"""
def __init__(
self,
metric: Union[str, Callable[[dict, dict], dict]],
args
) -> None:
""""""
self.metric = metric
self.args = args
if self.metric == "pmi":
self.metric_fn = pmi.pmi
elif self.metric == "n_pmi":
self.metric_fn = pmi.pmi_normalized
elif self.metric == "p_pmi":
self.metric_fn = pmi.pmi_positive
elif self.metric == "np_pmi":
self.metric_fn = pmi.pmi_positive_normalized
elif self.metric == "w_pmi":
self.metric_fn = pmi.pmi_weighted
elif self.metric == "nw_pmi":
self.metric_fn = pmi.pmi_normalized_weighted
elif self.metric == "pw_pmi":
self.metric_fn = pmi.pmi_positive_weighted
elif self.metric == "npw_pmi":
self.metric_fn = pmi.pmi_positive_normalized_weighted
elif self.metric == "np_relevance":
self.metric_fn = pmi.class_relevance_positive_normalized
elif self.metric == "nw_relevance":
self.metric_fn = pmi.class_relevance_normalized_weighted
elif self.metric == "npw_relevance":
self.metric_fn = pmi.class_relevance_positive_normalized_weighted
# elif self.metric == "lex_art":
# self.metric_fn = pmi.pmi_lexical_artifacts
elif self.metric == "ttr":
self.metric_fn = lexical_variation.ttr
elif self.metric == "root_ttr":
self.metric_fn = lexical_variation.rttr
elif self.metric == "maas":
self.metric_fn = lexical_variation.maas
elif self.metric == "log_ttr":
self.metric_fn = lexical_variation.lttr
elif self.metric == "freq":
self.metric_fn = corpus_statistics.create_frequency_dictionary
elif self.metric == "stats":
self.metric_fn = corpus_statistics.compute_basic_stats
elif callable(self.metric):
self.metric_fn = self.metric
elif type(self.metric) is str:
raise NotImplementedError(f"The metric '{self.metric}' is not implemented.")
else:
raise ValueError(f"The specified metric should be a callable function or a string matching an implemented metric. Got a {type(self.metric)} instead")
[docs] def calculate_metric(self, label_values_dict, subsets_of_interest):
"""Calls the appropriate metric function.
Parameters
----------
label_values_dict: dict
A dictionary containing all of the possible values each variable can take in the input dataset.
subsets_of_interest: dict
A dictionary containing a pandas series with tokenized texts for each variable/text column combination out of the variables and text columns specified by the user.
Returns
-------
:dict
A `dict` with the results of the calculated metric function.
"""
return self.metric_fn(label_values_dict, subsets_of_interest, self.args)