Source code for iguanas.metrics.unsupervised

"""Contains classes for calculating unsupervised metrics."""
from typing import Union
import iguanas.utils as utils
from iguanas.utils.types import NumpyArray, PandasDataFrame, PandasSeries, \
    KoalasDataFrame, KoalasSeries
from iguanas.utils.typing import NumpyArrayType, PandasDataFrameType, \
    PandasSeriesType, KoalasDataFrameType, KoalasSeriesType


[docs]class AlertsPerDay: """ Calculates the negative squared difference between the number of alerts per day in the binary predictor(s) vs the expected. Parameters ---------- n_alerts_expected_per_day : int Expected number of alerts for the given rule. no_of_days_in_file : int Number of days of data provided in the file. """ def __init__(self, n_alerts_expected_per_day: int, no_of_days_in_file: int): self.n_alerts_expected_per_day = n_alerts_expected_per_day self.no_of_days_in_file = no_of_days_in_file
[docs] def fit(self, y_preds: Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType]) -> Union[float, NumpyArrayType]: """ Calculates the negative squared difference between the number of alerts per day in the binary predictor(s) vs the expected. Parameters ---------- y_preds : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType] The binary predictor column(s). Returns ------- Union[float, NumpyArrayType] The negative squared difference(s). """ utils.check_allowed_types( y_preds, 'y_preds', [ NumpyArray, PandasSeries, PandasDataFrame, KoalasSeries, KoalasDataFrame ]) if utils.is_type( y_preds, [PandasSeries, PandasDataFrame] ): y_preds = y_preds.to_numpy() num_flagged = y_preds.sum(0) if utils.is_type(num_flagged, [KoalasSeries]): num_flagged = num_flagged.to_numpy() n_alerts_per_day = num_flagged/self.no_of_days_in_file f_min = (n_alerts_per_day-self.n_alerts_expected_per_day) ** 2 return -f_min
[docs]class PercVolume: """ Calculates the negative squared difference(s) between the percentage of the overall volume that the binary predictor(s) vs the expected. Parameters ---------- perc_vol_expected : float Expected percentage of the overall volume that the binary predictor should flag. """ def __init__(self, perc_vol_expected: float): self.perc_vol_expected = perc_vol_expected
[docs] def fit(self, y_preds: Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType]) -> Union[float, NumpyArrayType]: """ Calculates the negative squared difference(s) between the percentage of the overall volume that the binary predictor(s) vs the expected. Parameters ---------- y_preds : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType] The binary predictor column(s). Returns ------- Union[float, NumpyArrayType] The negative squared difference(s). """ utils.check_allowed_types( y_preds, 'y_preds', [ NumpyArray, PandasSeries, PandasDataFrame, KoalasSeries, KoalasDataFrame ]) if utils.is_type( y_preds, [PandasSeries, PandasDataFrame] ): y_preds = y_preds.to_numpy() perc_flagged = y_preds.mean(0) if utils.is_type(perc_flagged, [KoalasSeries]): perc_flagged = perc_flagged.to_numpy() f_min = (perc_flagged-self.perc_vol_expected) ** 2 return -f_min