Skip to content

Commit

Permalink
Conform to flake8 code style
Browse files Browse the repository at this point in the history
  • Loading branch information
m-martin-j committed Apr 17, 2024
1 parent c668ac9 commit a91cfbc
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 5 deletions.
9 changes: 9 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[flake8]
max-line-length = 150
ignore =
# whitespace around parameter equals
E252,
# line brake before binary operator
W503

per-file-ignores = __init__.py:F401
14 changes: 9 additions & 5 deletions fcbf/fcbf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def prior(X: pd.Series) -> pd.Series:
n = X.size
return X.value_counts()/n


def cond_proba(X: pd.Series, y: pd.Series) -> pd.Series:
"""Calculates the conditional probability of a feature given the class.
# TODO: accept a nx2 df containing X and y
Expand All @@ -36,7 +37,8 @@ def cond_proba(X: pd.Series, y: pd.Series) -> pd.Series:
sample = pd.concat([X, y], axis=1)
return sample.groupby([X.name, y.name]).size().div(len(sample.index)).div(prior(y), axis=0, level=y.name)

def cond_entropy(X: pd.Series, y: pd.Series, base: float =np.e) -> float:

def cond_entropy(X: pd.Series, y: pd.Series, base: float = np.e) -> float:
"""Calculates the conditional entropy of a feature given the class.
Args:
Expand All @@ -52,7 +54,8 @@ def cond_entropy(X: pd.Series, y: pd.Series, base: float =np.e) -> float:
prod = cond_proba_ * logged_cond_proba_
return -1 * prod.groupby(level=y.name).sum().mul(prior(y)).sum()

def information_gain(X: pd.Series, y: pd.Series, base: float=np.e) -> float:

def information_gain(X: pd.Series, y: pd.Series, base: float = np.e) -> float:
"""Calculates the information gain IG of a feature regarding the class.
Formula: IG(X|y) = entropy(X) - cond_entropy(X|y)
Expand All @@ -74,7 +77,8 @@ def information_gain(X: pd.Series, y: pd.Series, base: float=np.e) -> float:

return entropy_ - cond_entropy_

def symmetrical_uncertainty(X: pd.Series, y: pd.Series, base: float=np.e) -> float:

def symmetrical_uncertainty(X: pd.Series, y: pd.Series, base: float = np.e) -> float:
"""Calculates the symmetrical uncertainty SU of a feature regarding the class.
Formula: SU(X,y) = 2 * IG(X|y) / ( entropy(X) + entropy(y) )
Expand All @@ -92,8 +96,9 @@ def symmetrical_uncertainty(X: pd.Series, y: pd.Series, base: float=np.e) -> flo

return 2 * information_gain_ / (entropy_X + entropy_y)


def fcbf(X: pd.DataFrame, y: pd.Series,
su_threshold: float=0.0, base: float=np.e) -> Tuple[list, list, dict]:
su_threshold: float = 0.0, base: float = np.e) -> Tuple[list, list, dict]:
"""Fast correlation-based filter algorithm introduced by Yu and Liu.
@inproceedings{inproceedings,
author = {Yu, Lei and Liu, Huan},
Expand Down Expand Up @@ -184,7 +189,6 @@ def fcbf(X: pd.DataFrame, y: pd.Series,

from fcbf import data


dataset = data.lung_cancer
X = dataset[dataset.columns[1:]]
y = dataset[dataset.columns[0]].astype(int)
Expand Down

0 comments on commit a91cfbc

Please sign in to comment.