Source code for iguanas.metrics.pairwise

"""Contains classes for calculating pairwise metrics."""
from sklearn.metrics.pairwise import pairwise_distances
import pandas as pd
import iguanas.utils as utils
from iguanas.utils.types import PandasDataFrame
from iguanas.utils.typing import PandasDataFrameType


[docs]class CosineSimilarity: """ Computes the cosine similarity between columns in X. Parameters ---------- **kwargs : dict Any keyword arguments to be used in the sklearn `cosine_similarity` function. """ def __init__(self, **kwargs): self.kwargs = kwargs
[docs] def fit(self, X: PandasDataFrameType) -> PandasDataFrameType: """ Computes the cosine similarity between columns in X. Parameters ---------- X : PandasDataFrameType Dataframe containing binary columns. Returns ------- PandasDataFrameType Dataframe containing the pairwise cosine similarities. """ utils.check_allowed_types(X, 'X', [PandasDataFrame]) cos_sim_matrix = 1 - \ pairwise_distances(X=X.values.T, metric='cosine', **self.kwargs) return pd.DataFrame(cos_sim_matrix, index=X.columns, columns=X.columns)
[docs]class JaccardSimilarity: """ Computes the Jaccard similarity between columns in X. Parameters ---------- **kwargs : dict Any keyword arguments to be used in the sklearn `pairwise_distances` function. """ def __init__(self, **kwargs): self.kwargs = kwargs
[docs] def fit(self, X: PandasDataFrameType) -> PandasDataFrameType: """ Computes the Jaccard similarity between columns in X. Parameters ---------- X : PandasDataFrameType Dataframe containing binary columns. Returns ------- PandasDataFrameType Dataframe containing the pairwise Jaccard similarities. """ utils.check_allowed_types(X, 'X', [PandasDataFrame]) jaccard_matrix = 1 - \ pairwise_distances(X=X.values.T.astype( bool), metric="jaccard", **self.kwargs) return pd.DataFrame(jaccard_matrix, index=X.columns, columns=X.columns)