Source code for iguanas.rule_selection.correlated_filter
"""Filters correlated rules."""
from iguanas.correlation_reduction import AgglomerativeClusteringReducer
from iguanas.utils.typing import PandasDataFrameType
[docs]class CorrelatedFilter:
"""
Filters correlated rules based on a correlation reduction class (see the
`correlation_reduction` sub-package).
Parameters
----------
correlation_reduction_class : AgglomerativeClusteringReducer
Instatiated class from the `correlation_reduction` sub-package.
rule_descriptions : PandasDataFrameType, optional
The standard performance metrics dataframe associated with the rules
(if available). Defaults to None.
Attributes
----------
rules_to_keep : List[str]
List of rules which remain after correlated rules have been removed.
"""
def __init__(self,
correlation_reduction_class: AgglomerativeClusteringReducer,
rule_descriptions=None):
self.correlation_reduction_class = correlation_reduction_class
self.rule_descriptions = rule_descriptions
[docs] def fit(self, X_rules: PandasDataFrameType, **kwargs) -> None:
"""
Calculates the uncorrelated rules(using the correlation reduction
class).
Parameters
----------
X_rules : PandasDataFrameType
The binary columns of the rules applied to a dataset.
**kwargs : dict
Any keyword arguments to pass to the correlation reduction class's
`.fit()` method
"""
self.correlation_reduction_class.fit(X=X_rules, **kwargs)
self.rules_to_keep = self.correlation_reduction_class.columns_to_keep
[docs] def transform(self, X_rules: PandasDataFrameType) -> PandasDataFrameType:
"""
Keeps only the uncorrelated rules in `X_rules` and `rule_descriptions`.
Parameters
----------
X_rules : PandasDataFrameType
The binary columns of the rules applied to a dataset.
Returns
-------
PandasDataFrameType
The binary columns of the uncorrelated rules.
"""
X_rules = X_rules[self.correlation_reduction_class.columns_to_keep]
self.rule_descriptions = self.rule_descriptions.loc[
self.correlation_reduction_class.columns_to_keep]
return X_rules
[docs] def fit_transform(self, X_rules: PandasDataFrameType, **kwargs) -> PandasDataFrameType:
"""
Calculates the uncorrelated rules(using the correlation reduction
class) then keeps only these uncorrelated rules in `X_rules` and
`rule_descriptions`.
Parameters
----------
X_rules : PandasDataFrameType
The binary columns of the rules applied to a dataset.
**kwargs : dict
Any keyword arguments to pass to the correlation reduction class's
`.fit()` method.
Returns
-------
PandasDataFrameType
The binary columns of the uncorrelated rules.
"""
self.fit(X_rules=X_rules, **kwargs)
return self.transform(X_rules=X_rules)