Source code for iguanas.rule_selection.correlated_filter

"""Filters correlated rules."""
from iguanas.correlation_reduction import AgglomerativeClusteringReducer
from iguanas.utils.typing import PandasDataFrameType


[docs]class CorrelatedFilter: """ Filters correlated rules based on a correlation reduction class (see the `correlation_reduction` sub-package). Parameters ---------- correlation_reduction_class : AgglomerativeClusteringReducer Instatiated class from the `correlation_reduction` sub-package. rule_descriptions : PandasDataFrameType, optional The standard performance metrics dataframe associated with the rules (if available). Defaults to None. Attributes ---------- rules_to_keep : List[str] List of rules which remain after correlated rules have been removed. """ def __init__(self, correlation_reduction_class: AgglomerativeClusteringReducer, rule_descriptions=None): self.correlation_reduction_class = correlation_reduction_class self.rule_descriptions = rule_descriptions
[docs] def fit(self, X_rules: PandasDataFrameType, **kwargs) -> None: """ Calculates the uncorrelated rules(using the correlation reduction class). Parameters ---------- X_rules : PandasDataFrameType The binary columns of the rules applied to a dataset. **kwargs : dict Any keyword arguments to pass to the correlation reduction class's `.fit()` method """ self.correlation_reduction_class.fit(X=X_rules, **kwargs) self.rules_to_keep = self.correlation_reduction_class.columns_to_keep
[docs] def transform(self, X_rules: PandasDataFrameType) -> PandasDataFrameType: """ Keeps only the uncorrelated rules in `X_rules` and `rule_descriptions`. Parameters ---------- X_rules : PandasDataFrameType The binary columns of the rules applied to a dataset. Returns ------- PandasDataFrameType The binary columns of the uncorrelated rules. """ X_rules = X_rules[self.correlation_reduction_class.columns_to_keep] self.rule_descriptions = self.rule_descriptions.loc[ self.correlation_reduction_class.columns_to_keep] return X_rules
[docs] def fit_transform(self, X_rules: PandasDataFrameType, **kwargs) -> PandasDataFrameType: """ Calculates the uncorrelated rules(using the correlation reduction class) then keeps only these uncorrelated rules in `X_rules` and `rule_descriptions`. Parameters ---------- X_rules : PandasDataFrameType The binary columns of the rules applied to a dataset. **kwargs : dict Any keyword arguments to pass to the correlation reduction class's `.fit()` method. Returns ------- PandasDataFrameType The binary columns of the uncorrelated rules. """ self.fit(X_rules=X_rules, **kwargs) return self.transform(X_rules=X_rules)