"""Contains classes for calculating classification metrics."""
import numpy as np
from typing import Union
import iguanas.utils as utils
from iguanas.utils.types import NumpyArray, PandasDataFrame, PandasSeries, \
KoalasDataFrame, KoalasSeries
from iguanas.utils.typing import NumpyArrayType, PandasDataFrameType, \
PandasSeriesType, KoalasDataFrameType, KoalasSeriesType
[docs]class Precision:
"""
Calculates the Precision for either a single or set of binary
predictors.
"""
[docs] def fit(self,
y_true: Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType],
y_preds: Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType,
PandasDataFrameType, KoalasDataFrameType],
sample_weight=None) -> Union[float, NumpyArrayType]:
"""
Calculates the Precision for either a single or set of binary
predictors.
Parameters
----------
y_true : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType]
The target column.
y_preds : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType]
The binary predictor column(s).
sample_weight : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType], optional
Row-wise weights to apply. Defaults to None.
Returns
-------
Union[float, NumpyArrayType]
The Precision score(s).
"""
utils.check_allowed_types(
y_true, 'y_true', [
NumpyArray, PandasSeries,
KoalasSeries
])
utils.check_allowed_types(
y_preds, 'y_preds', [
NumpyArray, PandasSeries,
PandasDataFrame, KoalasSeries,
KoalasDataFrame
])
if sample_weight is not None:
utils.check_allowed_types(
sample_weight, 'sample_weight', [
NumpyArray, PandasSeries,
KoalasSeries
])
tps_sum, _, _, _, tps_fps_sum, _ = utils.calc_tps_fps_tns_fns(
y_true=y_true, y_preds=y_preds, sample_weight=sample_weight, tps=True, tps_fps=True)
tps_fps_sum = np.where(tps_fps_sum == 0, np.nan, tps_fps_sum)
precision = np.nan_to_num(np.divide(tps_sum, tps_fps_sum))
return precision
[docs]class Recall:
"""
Calculates the Recall for either a single or set of binary
predictors.
"""
[docs] def fit(self,
y_true: Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType],
y_preds: Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType],
sample_weight=None) -> Union[float, NumpyArrayType]:
"""
Calculates the Recall for either a single or set of binary
predictors.
Parameters
----------
y_true : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType]
The target column.
y_preds : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType]
The binary predictor column(s).
sample_weight : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType], optional
Row-wise weights to apply. Defaults to None.
Returns
-------
Union[float, NumpyArrayType]
The Recall score(s).
"""
utils.check_allowed_types(
y_true, 'y_true', [
NumpyArray, PandasSeries,
KoalasSeries
])
utils.check_allowed_types(
y_preds, 'y_preds', [
NumpyArray, PandasSeries,
PandasDataFrame, KoalasSeries,
KoalasDataFrame
])
if sample_weight is not None:
utils.check_allowed_types(
sample_weight, 'sample_weight', [
NumpyArray, PandasSeries,
KoalasSeries
])
tps_sum, _, _, _, _, tps_fns_sum = utils.calc_tps_fps_tns_fns(
y_true=y_true, y_preds=y_preds, sample_weight=sample_weight,
tps=True, tps_fns=True)
tps_fns_sum = np.where(tps_fns_sum == 0, np.nan, tps_fns_sum)
recall = np.nan_to_num(np.divide(tps_sum, tps_fns_sum))
return recall
[docs]class FScore:
"""
Calculates the Fbeta score for either a single or set of binary
predictors.
Parameters
----------
beta : float
The beta value used to calculate the Fbeta score.
"""
def __init__(self, beta: float):
self.beta = beta
def __repr__(self):
return f'FScore with beta={self.beta}'
[docs] def fit(self,
y_true: Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType],
y_preds: Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType],
sample_weight=None) -> Union[float, NumpyArrayType]:
"""
Calculates the Fbeta score for either a single or set of binary
predictors.
Parameters
----------
y_true : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType]
The target column.
y_preds : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType]
The binary predictor column(s).
sample_weight : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType], optional
Row-wise weights to apply. Defaults to None.
Returns
-------
Union[float, NumpyArrayType]
The Fbeta score(s).
"""
def _fscore(p, r, b):
if p == 0 or r == 0:
fs = 0
else:
fs = (1 + b ** 2) * ((p * r) / ((p * b ** 2) + r))
return fs
utils.check_allowed_types(
y_true, 'y_true', [
NumpyArray, PandasSeries,
KoalasSeries
])
utils.check_allowed_types(
y_preds, 'y_preds', [
NumpyArray, PandasSeries,
PandasDataFrame, KoalasSeries,
KoalasDataFrame
])
if sample_weight is not None:
utils.check_allowed_types(
sample_weight, 'sample_weight', [
NumpyArray, PandasSeries,
KoalasSeries
])
tps_sum, _, _, _, tps_fps_sum, tps_fns_sum = utils.calc_tps_fps_tns_fns(
y_true=y_true, y_preds=y_preds, sample_weight=sample_weight,
tps=True, tps_fps=True, tps_fns=True
)
tps_fps_sum = np.where(tps_fps_sum == 0, np.nan, tps_fps_sum)
tps_fns_sum = np.where(tps_fns_sum == 0, np.nan, tps_fns_sum)
precisions = np.nan_to_num(np.divide(tps_sum, tps_fps_sum))
recalls = np.nan_to_num(np.divide(tps_sum, tps_fns_sum))
if utils.is_type(precisions, NumpyArray) and \
utils.is_type(recalls, NumpyArray):
fscores = np.array([_fscore(p, r, self.beta)
for p, r in zip(precisions, recalls)])
else:
fscores = _fscore(precisions, recalls, self.beta)
return fscores
[docs]class Revenue:
"""
Calculates the revenue for either a single or set of binary
predictors.
Parameters
----------
y_type : str
Dictates whether the binary target column flags fraud (y_type =
'Fraud') or non-fraud (y_type = 'NonFraud').
chargeback_multiplier : int
Multiplier to apply to chargeback transactions.
"""
def __init__(self, y_type: str, chargeback_multiplier: int):
if y_type not in ['Fraud', 'NonFraud']:
raise ValueError('`y_type` must be either "Fraud" or "NonFraud"')
self.y_type = y_type
self.chargeback_multiplier = chargeback_multiplier
[docs] def fit(self,
y_true: Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType],
y_preds: Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType],
sample_weight: Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType]) -> Union[float, NumpyArrayType]:
"""
Calculates the revenue for either a single or set of binary
predictors.
Parameters
----------
y_true : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType]
The target column.
y_preds : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType, PandasDataFrameType, KoalasDataFrameType]
The binary predictor column.
sample_weight : Union[NumpyArrayType, PandasSeriesType, KoalasSeriesType]
Row-wise transaction amounts to apply.
Returns
-------
Union[float, NumpyArrayType]
Revenue(s).
"""
utils.check_allowed_types(
y_true, 'y_true', [
NumpyArray, PandasSeries,
KoalasSeries
])
utils.check_allowed_types(
y_preds, 'y_preds', [
NumpyArray, PandasSeries,
PandasDataFrame, KoalasSeries,
KoalasDataFrame
])
utils.check_allowed_types(
sample_weight, 'sample_weight', [
NumpyArray, PandasSeries,
KoalasSeries
])
tps_sum, fps_sum, tns_sum, fns_sum, _, _ = utils.calc_tps_fps_tns_fns(
y_true=y_true, y_preds=y_preds, sample_weight=sample_weight,
tps=True, fps=True, tns=True, fns=True)
if self.y_type == 'Fraud':
revenue = self.chargeback_multiplier * \
(tps_sum - fns_sum) + tns_sum - fps_sum
elif self.y_type == 'NonFraud':
revenue = tps_sum - fns_sum + \
self.chargeback_multiplier * (tns_sum - fps_sum)
return revenue