"""Contains classes for calculating unsupervised metrics."""
from typing import Union
import iguanas.utils as utils
from iguanas.utils.types import NumpyArray, PandasDataFrame, PandasSeries, \
KoalasDataFrame, KoalasSeries
from iguanas.utils.typing import NumpyArrayType, PandasDataFrameType, \
PandasSeriesType, KoalasDataFrameType, KoalasSeriesType
[docs]class AlertsPerDay:
"""
Calculates the negative squared difference between the number of alerts per
day in the binary predictor(s) vs the expected.
Parameters
----------
n_alerts_expected_per_day : int
Expected number of alerts for the given rule.
no_of_days_in_file : int
Number of days of data provided in the file.
"""
def __init__(self, n_alerts_expected_per_day: int,
no_of_days_in_file: int):
self.n_alerts_expected_per_day = n_alerts_expected_per_day
self.no_of_days_in_file = no_of_days_in_file
[docs] def fit(self,
y_preds: Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType]) -> Union[float, NumpyArrayType]:
"""
Calculates the negative squared difference between the number of alerts
per day in the binary predictor(s) vs the expected.
Parameters
----------
y_preds : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType]
The binary predictor column(s).
Returns
-------
Union[float, NumpyArrayType]
The negative squared difference(s).
"""
utils.check_allowed_types(
y_preds, 'y_preds', [
NumpyArray, PandasSeries,
PandasDataFrame, KoalasSeries,
KoalasDataFrame
])
if utils.is_type(
y_preds, [PandasSeries, PandasDataFrame]
):
y_preds = y_preds.to_numpy()
num_flagged = y_preds.sum(0)
if utils.is_type(num_flagged, [KoalasSeries]):
num_flagged = num_flagged.to_numpy()
n_alerts_per_day = num_flagged/self.no_of_days_in_file
f_min = (n_alerts_per_day-self.n_alerts_expected_per_day) ** 2
return -f_min
[docs]class PercVolume:
"""
Calculates the negative squared difference(s) between the percentage of the
overall volume that the binary predictor(s) vs the expected.
Parameters
----------
perc_vol_expected : float
Expected percentage of the overall volume that the binary predictor
should flag.
"""
def __init__(self, perc_vol_expected: float):
self.perc_vol_expected = perc_vol_expected
[docs] def fit(self, y_preds: Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType]) -> Union[float, NumpyArrayType]:
"""
Calculates the negative squared difference(s) between the percentage of
the overall volume that the binary predictor(s) vs the expected.
Parameters
----------
y_preds : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType]
The binary predictor column(s).
Returns
-------
Union[float, NumpyArrayType]
The negative squared difference(s).
"""
utils.check_allowed_types(
y_preds, 'y_preds', [
NumpyArray, PandasSeries,
PandasDataFrame, KoalasSeries,
KoalasDataFrame
])
if utils.is_type(
y_preds, [PandasSeries, PandasDataFrame]
):
y_preds = y_preds.to_numpy()
perc_flagged = y_preds.mean(0)
if utils.is_type(perc_flagged, [KoalasSeries]):
perc_flagged = perc_flagged.to_numpy()
f_min = (perc_flagged-self.perc_vol_expected) ** 2
return -f_min