Source code for iguanas.rule_scoring.rule_scorer

"""
Generates scores for each rule in a set. Scaling functions can also be applied
to the scores.
"""
from iguanas.rule_scoring.rule_scoring_methods import PerformanceScorer, LogRegScorer,\
    RandomForestScorer
from iguanas.utils.typing import PandasDataFrameType, PandasSeriesType
from iguanas.rule_scoring.rule_score_scalers import MinMaxScaler, ConstantScaler
from typing import Union


[docs]class RuleScorer: """ Generates rule scores using the rule binary columns and the target column. Parameters ---------- scoring_class : Union[PerformanceScorer, LogRegScorer, RandomForestScorer] The instantiated scoring class - this defines the method for generating the scores. Scoring classes are available in the `rule_scoring_methods` module. scaling_class : Union[MinMaxScaler, ConstantScaler], optional The instantiated scaling class - this defines the method for scaling the raw scores from the scoring class. Scaling classes are available in the `rule_score_scalers` module. Defaults to None. Attributes ---------- rule_scores : Dict[str, int] Contains the generated score (values) for each rule (keys). """ def __init__(self, scoring_class: Union[PerformanceScorer, LogRegScorer, RandomForestScorer], scaling_class=None): self.scoring_class = scoring_class self.scaling_class = scaling_class
[docs] def fit(self, X_rules: PandasDataFrameType, y: PandasSeriesType, sample_weight=None) -> None: """ Generates rule scores using the rule binary columns and the binary target column. Parameters ---------- X_rules : PandasDataFrameType The rule binary columns. y : PandasSeriesType The binary target column. sample_weight : PandasSeriesType, optional Row-wise weights to apply in the `scoring_class`. Defaults to None. """ self.rule_scores = self.scoring_class.fit( X_rules=X_rules, y=y, sample_weight=sample_weight) if self.scaling_class is not None: self.rule_scores = self.scaling_class.fit( rule_scores=self.rule_scores)
[docs] def transform(self, X_rules: PandasDataFrameType) -> PandasDataFrameType: """ Transforms the rule binary columns to show the generated scores applied to the dataset (i.e. replaces the 1 in `X_rules` with the generated score). Parameters ---------- X_rules : PandasDataFrameType The rule binary columns. Returns ------- PandasDataFrameType The generated scores applied to the dataset. """ X_scores = self.rule_scores * X_rules return X_scores
[docs] def fit_transform(self, X_rules: PandasDataFrameType, y: PandasSeriesType, sample_weight=None) -> PandasDataFrameType: """ Generates rule scores using the rule binary columns and the binary target column, then transforms the rule binary columns to show the generated scores applied to the dataset (i.e. replaces the 1 in `X_rules` with the generated score). Parameters ---------- X_rules : PandasDataFrameType The rule binary columns. y : PandasSeriesType The binary target column. sample_weight : PandasSeriesType, optional Row-wise weights to apply in the `scoring_class`. Defaults to None. Returns ------- PandasDataFrameType The generated scores applied to the dataset. """ self.fit(X_rules=X_rules, y=y, sample_weight=sample_weight) X_scores = self.transform(X_rules=X_rules) return X_scores