Source code for iguanas.rules.rules

"""
Defines a rule set using one of the standard Iguanas representations. This rule
set can then be reformatted into the other standard Iguanas representations 
using the class methods.
"""

from iguanas.rules._convert_rule_dicts_to_rule_strings import _ConvertRuleDictsToRuleStrings
from iguanas.rules._convert_rule_strings_to_rule_dicts import _ConvertRuleStringsToRuleDicts
from iguanas.rules._convert_rule_dicts_to_rule_lambdas import _ConvertRuleDictsToRuleLambdas
from iguanas.rules._convert_rule_lambdas_to_rule_strings import _ConvertRuleLambdasToRuleStrings
from iguanas.rules._get_rule_attributes import _GetRuleFeatures

from iguanas.rule_application import RuleApplier
from typing import List, Dict, Callable, Union

from iguanas.utils.typing import KoalasDataFrameType, PandasDataFrameType


[docs]class Rules(RuleApplier): """ Defines a set of rules using the following representations: string, dictionary, lambda expression. One of the above formats must be provided to define the rule set. The rules can then be reformatted into one of the other representations. Parameters ---------- rule_dicts : Dict[str, dict] Set of rules defined using thestandard Iguanas dictionary format (values) and their names (keys). Defaults to `None`. rule_strings : Dict[str, str] Set of rules defined using the standard Iguanas string format (values) and their names (keys). Defaults to `None`. rule_lambdas : Dict[str, Callable[[dict], str]] Set of rules defined using the standard Iguanas lambda expression format (values) and their names (keys). Must be given in conjunction with either `lambda_kwargs` or `lambda_args`. Defaults to `None`. lambda_kwargs : Dict[str, dict] For each rule (keys), a dictionary containing the features used in the rule (keys) and the current values (values). Only populates when `.as_lambda()` is used with the keyword argument `with_kwargs=True`. Defaults to `None`. lambda_args : Dict[str, list] For each rule (keys), a list containing the current values used in the rule. Only populates when `.as_lambda()` is used with the keyword argument `with_kwargs=False`. Defaults to `None`. opt_func : Callable, optional A function/method which calculates a custom metric (e.g. Fbeta score) for each rule when applying the rules to a dataset. Defaults to None. Attributes ---------- rule_dicts : Dict[str, dict] Set of rules defined using the standard Iguanas dictionary format (values) and their names (keys). rule_strings : Dict[str, str] Set of rules defined using the standard Iguanas string format (values) and their names (keys). rule_lambdas : Dict[str, Callable[[dict], str]] Set of rules defined using the standard Iguanas lambda expression format (values) and their names (keys). lambda_kwargs : Dict[str, dict] For each rule (keys), a dictionary containing the features used in the rule (keys) and the current values (values). lambda_args : Dict[str, list] For each rule (keys), a list containing the current values used in the rule. rule_features : Dict[str, set] For each rule (keys), a set containing the features used in the rule (only populates when the `.get_rules_features()` method is used). rule_descriptions : PandasDataFrameType A dataframe showing the logic of the rules and their performance metrics on the provided dataset (only populates when the `.transform()` method is used). """ def __init__(self, rule_dicts=None, rule_strings=None, rule_lambdas=None, lambda_kwargs=None, lambda_args=None, opt_func=None): if rule_dicts is None and rule_strings is None and rule_lambdas is None: raise ValueError( '`rule_dicts`, `rule_strings` or `rule_lambdas` must be given') if rule_lambdas is not None and lambda_kwargs is None and \ lambda_args is None: raise ValueError( '`lambda_kwargs` or `lambda_args` must be given when `rule_lambdas` is provided') self.rule_dicts = {} if rule_dicts is None else rule_dicts self.rule_strings = {} if rule_strings is None else rule_strings self.rule_lambdas = {} if rule_lambdas is None else rule_lambdas self.lambda_kwargs = {} if lambda_kwargs is None else lambda_kwargs self.lambda_args = {} if lambda_args is None else lambda_args self.rule_features = {} RuleApplier.__init__(self, rule_strings=self.rule_strings, opt_func=opt_func) def __repr__(self): rules_lens = [ len(self.rule_dicts), len(self.rule_strings), len(self.rule_lambdas) ] return f'Rules object containing {max(rules_lens)} rules'
[docs] def as_rule_dicts(self) -> Dict[str, dict]: """ Converts rules into the standard Iguanas dictionary format. Returns ------- Dict[str, dict] Rules in the standard Iguanas dictionary format. """ if self.rule_strings != {}: self._rule_strings_to_rule_dicts() elif self.rule_lambdas != {}: self._rule_lambdas_to_rule_strings() self._rule_strings_to_rule_dicts() return self.rule_dicts
[docs] def as_rule_strings(self, as_numpy: bool) -> Dict[str, str]: """ Converts rules into the standard Iguanas string format. Parameters ---------- as_numpy : bool If `True`, the conditions in the string format will uses Numpy rather than Pandas. These rules are generally evaluated more quickly on larger dataset stored as Pandas DataFrames. Returns ------- Dict[str, str] Rules in the standard Iguanas string format. """ if self.rule_strings != {}: self._rule_strings_to_rule_dicts() elif self.rule_lambdas != {}: self._rule_lambdas_to_rule_strings() self._rule_strings_to_rule_dicts() self._rule_dicts_to_rule_strings(as_numpy=as_numpy) return self.rule_strings
[docs] def as_rule_lambdas(self, as_numpy: bool, with_kwargs: bool) -> Dict[str, Callable[[dict], str]]: """ Converts rules into the standard Iguanas lambda expression format. Parameters ---------- as_numpy : bool If `True`, the conditions in the string format will uses Numpy rather than Pandas. These rules are generally evaluated more quickly on larger dataset stored as Pandas DataFrames. with_kwargs : bool If `True`, the string in the lambda expression is created such that the inputs are keyword arguments. If `False`, the inputs are positional arguments. Returns ------- Dict[str, Callable[[dict], str]] Rules in the standard Iguanas lambda expression format. """ if self.rule_lambdas != {}: self._rule_lambdas_to_rule_strings() self._rule_strings_to_rule_dicts() elif self.rule_strings != {}: self._rule_strings_to_rule_dicts() self._rule_dicts_to_rule_lambdas( as_numpy=as_numpy, with_kwargs=with_kwargs) return self.rule_lambdas
[docs] def transform(self, X: Union[PandasDataFrameType, KoalasDataFrameType], y=None, sample_weight=None) -> Union[PandasDataFrameType, KoalasDataFrameType]: """ Applies the set of rules to a dataset, `X`. If `y` is provided, the performance metrics for each rule will also be calculated. Parameters ---------- X : Union[PandasDataFrameType, KoalasDataFrameType] The feature set on which the rules should be applied. y : Union[PandasSeriesType, KoalasSeriesType], optional The target column. Defaults to None. sample_weight : Union[PandasSeriesType, KoalasSeriesType], optional Record-wise weights to apply. Defaults to None. Returns ------- Union[PandasDataFrameType, KoalasDataFrameType] The binary columns of the rules. """ if self.rule_strings == {}: self.rule_strings = self.as_rule_strings(as_numpy=False) X_rules = RuleApplier.transform( self, X=X, y=y, sample_weight=sample_weight ) return X_rules
[docs] def filter_rules(self, include=None, exclude=None) -> None: """ Filters the rules by their names. Parameters ---------- include : List[str], optional The list of rule names to keep. Defaults to `None`. exclude : List[str], optional The list of rule names to drop. Defaults to `None`. Raises ------ Exception `include` and `exclude` cannot contain similar values. """ if include is not None and exclude is not None: intersected = set.intersection(set(include), set(exclude)) if len(intersected) > 0: raise Exception( '`include` and `exclude` contain similar values') for d in [self.rule_strings, self.rule_dicts, self.rule_lambdas]: if d != {}: rule_names = list(d.keys()) break for rule_name in rule_names: if (include is not None and rule_name not in include) or \ (exclude is not None and rule_name in exclude): self.rule_strings.pop(rule_name, None) self.rule_dicts.pop(rule_name, None) self.rule_lambdas.pop(rule_name, None) self.lambda_kwargs.pop(rule_name, None) self.lambda_args.pop(rule_name, None) self.rule_features.pop(rule_name, None)
[docs] def get_rule_features(self) -> Dict[str, set]: """ Returns the set of unique features present in each rule. Returns ------- Dict[str, set] Set of unique features (values) in each rule (keys). """ if self.rule_dicts == {}: _ = self.as_rule_dicts() grf = _GetRuleFeatures(rule_dicts=self.rule_dicts) self.rule_feature_set = grf.get() return self.rule_feature_set
def _rule_dicts_to_rule_strings(self, as_numpy: bool) -> None: """ Converts the rules (each being represented in the standard Iguanas dictionary format) into the standard Iguanas string format. """ if self.rule_dicts == {}: raise ValueError('`rule_dicts` must be given') converter = _ConvertRuleDictsToRuleStrings( rule_dicts=self.rule_dicts) self.rule_strings = converter.convert(as_numpy=as_numpy) def _rule_strings_to_rule_dicts(self) -> None: """ Converts the rules (each being represented in the standard Iguanas string format) into the standard Iguanas dictionary format. """ if self.rule_strings == {}: raise ValueError('`rule_strings` must be given') converter = _ConvertRuleStringsToRuleDicts( rule_strings=self.rule_strings) self.rule_dicts = converter.convert() def _rule_dicts_to_rule_lambdas(self, as_numpy: bool, with_kwargs: bool) -> None: """ Converts the rules (each being represented in the standard Iguanas dictionary format) into the standard Iguanas lambda expression format. This is useful for optimising rules. """ if self.rule_dicts == {}: raise ValueError('`rule_dicts` must be given') converter = _ConvertRuleDictsToRuleLambdas(rule_dicts=self.rule_dicts) self.rule_lambdas = converter.convert( as_numpy=as_numpy, with_kwargs=with_kwargs) self.lambda_kwargs = converter.lambda_kwargs self.lambda_args = converter.lambda_args self.rule_features = converter.rule_features def _rule_lambdas_to_rule_strings(self) -> None: """ Converts the rules (each being represented in the standard Iguanas lambda expression format) into the standard Iguanas string format. """ if self.rule_lambdas == {}: raise ValueError('`rule_lambdas` must be given') if self.lambda_kwargs == {} and self.lambda_args == {}: raise ValueError('`lambda_kwargs` or `lambda_args` must be given') converter = _ConvertRuleLambdasToRuleStrings( rule_lambdas=self.rule_lambdas, lambda_kwargs=self.lambda_kwargs, lambda_args=self.lambda_args) self.rule_strings = converter.convert()