diff --git a/README.md b/README.md index cc44247..f0ba2d0 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,24 @@ journal = {Proceedings, Twentieth International Conference on Machine Learning} Data for testing is taken from the [UCI Machine Learning Repository](http://archive.ics.uci.edu/ml) +## Example + +```py +from fcbf import fcbf, data + + +dataset = data.lung_cancer +X = dataset[dataset.columns[1:]] +y = dataset[dataset.columns[0]].astype(int) +print(X) +print(y) + +relevant_features, irrelevant_features, correlations = fcbf(X, y, su_threshold=0.1, base=2) +print('relevant_features:', relevant_features, '(count:', len(relevant_features), ')') +print('irrelevant_features:', irrelevant_features, '(count:', len(irrelevant_features), ')') +print('correlations:', correlations) +``` + ## Setup Using pip, execute the following diff --git a/fcbf/fcbf.py b/fcbf/fcbf.py index bfaae98..f157ec8 100644 --- a/fcbf/fcbf.py +++ b/fcbf/fcbf.py @@ -178,3 +178,20 @@ def fcbf(X: pd.DataFrame, y: pd.Series, irrelevant_feature_names = [f_name for (f_name, _) in S_ord_removed + S_removed] return relevant_feature_names_sorted, irrelevant_feature_names, correlation_values + + +if __name__ == '__main__': + + from fcbf import data + + + dataset = data.lung_cancer + X = dataset[dataset.columns[1:]] + y = dataset[dataset.columns[0]].astype(int) + print(X) + print(y) + + relevant_features, irrelevant_features, correlations = fcbf(X, y, su_threshold=0.1, base=2) + print('relevant_features:', relevant_features, '(count:', len(relevant_features), ')') + print('irrelevant_features:', irrelevant_features, '(count:', len(irrelevant_features), ')') + print('correlations:', correlations)