diff --git a/CHANGELOG.md b/CHANGELOG.md index fa3bc0e..c5088b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `performance` in `toad.utils` for test code performance - Added `pickletracer` in `toad.utils` for infer requirements in pickle object +### Fixed +- Fixed `Value Error` in `select` and `drop_corr` method when using `pandas >= 2.0.x` + ## [0.1.2] - 2023-04-09 ### Add diff --git a/toad/__init__.py b/toad/__init__.py index 500dfa0..572e326 100644 --- a/toad/__init__.py +++ b/toad/__init__.py @@ -10,6 +10,7 @@ from .detector import detect from .metrics import KS, KS_bucket, F1 from .stats import quality, IV, VIF, WOE, entropy, entropy_cond, gini, gini_cond +from .transform import Combiner, WOETransformer from .selection import select from .scorecard import ScoreCard from .utils import Progress, performance diff --git a/toad/selection.py b/toad/selection.py index 2253406..8e707f6 100644 --- a/toad/selection.py +++ b/toad/selection.py @@ -326,7 +326,7 @@ def drop_corr(frame, target = None, threshold = 0.7, by = 'IV', f, t = split_target(frame[cols], target) - corr = f.corr().abs() + corr = f.corr(numeric_only = True).abs() drops = [] diff --git a/toad/selection_test.py b/toad/selection_test.py index 90cd323..8c1a245 100644 --- a/toad/selection_test.py +++ b/toad/selection_test.py @@ -48,7 +48,14 @@ def test_drop_var_exclude(): def test_drop_corr(): df = drop_corr(frame, target = 'target') - assert ['D', 'E', 'F', 'target'] == df.columns.tolist() + assert set(['D', 'E', 'F', 'target']) == set(df.columns.tolist()) + +def test_drop_corr_with_string(): + ab = np.array(list('ABCDEFG')) + str_feat = pd.Series(ab[np.random.choice(7, 500)]) + + df = drop_corr(pd.concat((frame, str_feat.rename('str_f')), axis = 1), target = 'target') + assert set(['D', 'E', 'F', 'target', 'str_f']) == set(df.columns.tolist()) def test_drop_iv(): df = drop_iv(frame, target = 'target', threshold = 0.25)