Skip to content

Commit

Permalink
add flat_mean utility function
Browse files Browse the repository at this point in the history
  • Loading branch information
paulbkoch committed Jan 15, 2025
1 parent f402539 commit ad0ebc1
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 7 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,7 @@ We also build on top of many great packages. Please check them out!
- [Revealing the Galaxy-Halo Connection Through Machine Learning](https://arxiv.org/pdf/2204.10332.pdf)
- [How the Galaxy–Halo Connection Depends on Large-Scale Environment](https://arxiv.org/pdf/2402.07995.pdf)
- [Explainable Artificial Intelligence for COVID-19 Diagnosis Through Blood Test Variables](https://link.springer.com/content/pdf/10.1007/s40313-021-00858-y.pdf)
- [A diagnostic support system based on interpretable machine learning and oscillometry for accurate diagnosis of respiratory dysfunction in silicosis](https://www.biorxiv.org/content/10.1101/2025.01.08.632001v1.full.pdf)
- [Using Explainable Boosting Machines (EBMs) to Detect Common Flaws in Data](https://link.springer.com/chapter/10.1007/978-3-030-93736-2_40)
- [Differentially Private Gradient Boosting on Linear Learners for Tabular Data Analysis](https://assets.amazon.science/fa/3a/a62ba73f4bbda1d880b678c39193/differentially-private-gradient-boosting-on-linear-learners-for-tabular-data-analysis.pdf)
- [Differentially private and explainable boosting machine with enhanced utility](https://www.sciencedirect.com/science/article/abs/pii/S0925231224011950)
Expand All @@ -659,6 +660,7 @@ We also build on top of many great packages. Please check them out!
- [Towards Cleaner Cities: Estimating Vehicle-Induced PM2.5 with Hybrid EBM-CMA-ES Modeling](https://www.mdpi.com/2305-6304/12/11/827)
- [Using machine learning to assist decision making in the assessment of mental health patients presenting to emergency departments](https://journals.sagepub.com/doi/full/10.1177/20552076241287364)
- [Proposing an inherently interpretable machine learning model for shear strength prediction of reinforced concrete beams with stirrups](https://pdf.sciencedirectassets.com/287527/1-s2.0-S2214509523X00035/1-s2.0-S2214509524005011/main.pdf?X-Amz-Security-Token=IQoJb3JpZ2luX2VjECUaCXVzLWVhc3QtMSJGMEQCIB0r0KsYBZufOjbCVtUtozwn1QKMdLt2tbbfhuJKjWlXAiB5Dfr7p0yyj%2FSfypTLmjPL8WbjGAB3tRACFjyyqQbbfiq8BQiu%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAUaDDA1OTAwMzU0Njg2NSIMqBpZ2HmN91c%2BJPqpKpAFZtvqQjCScZa4FN%2FeubsPzOk5c%2B58LliO4Zr%2Bn1pm3vtW4I9I1vA29pkhT5was1N3ccPPIm2jNLwJ%2FHiZej7A2SmFv13Ro3sTvhqG%2F6A9Xx70Nx9jOlDPJUmCypKadKp0FGfuhZQuxeN0b%2F1QUUQZG4RpxC%2FXorRRHmb%2FrXcOWBwu4PmLZAkWmTKpncjDI7oj8eh8yBe6%2FA3JkJ14ZyBgR7JnPzR2ZqMdIhvlKoyMn6EnL1Azq2y3qwEMdzSCvz3wH3sT4pClc2vPs6ruQS4CdT3E7BHrf42Q0VnUXWjuy7gt9iRr0vaWR3tD%2FxyrrEKw7XuMHO9L4rQ4Pfn1dhGZ2J8H5ocwJGSh13U5fY6noyaTNViqvHx1oHNMWL03QpkJxmUxYquBWepcDjxEc32V6eGF7Ecm8Vij3s20wdRNcHqxGFKlUCgph48CKUA79iwSGQCkWQh7bq%2FTtowTbSPud7l8xeG1MvfIVy%2B6yzrjqygvPBQs3qkvdoWUrKXe57bhr2jEkKlSdYyp2TJMD6yoYRdTPyFx5xb0KgIt6KQTPmfbqYXkd3FFz3uc0HmWC5NQz6qP9UzNcBhcK8dXo3Dw042pl0HLO1njFaa%2BBfbT89VUVUIqjrAcmHweIl1v7Eyldzr%2BGBXIlsxPO3gPzyPLF2LTggc6dA%2Bswxmgmkv%2B7n5pU5%2F5sxvEhemb%2Fqu%2B8d47O%2Bn6RH8fL4eLGGL2d0dvFvyE7gEwt%2BaU9HsIN0IHqyH5VmaTF5zaKy%2Fn%2BhkF8yGpe5Hq5yNOUGrfQgfyFn4Kqd%2FTVajxIFzk8DEY%2F%2FFtyGJ%2B8BrHV4P%2FYs8R4XcBzPQtyrTuUC1CGmF01Tc2gnnEo4pVPaIjfBk9B%2BXVMc3Mu4Ywy4L%2BsgY6sgFK3hFIXjIfoVjqrIlBvsGYaFiZB1bVKBVy3DRiBgozzYmIVhipN%2FS%2BPok1oETqvYVvLqEVkGcb5W7nUIK16lFgjwDq6ePuxdqSafgOw5jVQroNsDCPRz8B%2F4fg7kv6gs4R9SX7gCaQ2V7L6NxqJDUUqsCMtIYq05Qx43dGByqLoVEz9USpRBmTLQwpGvOmUaGNNwTsCwmt5gRP8UX3CnkwI%2FydxmhrXLEdaUIFVwJbIor9&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20240604T221639Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAQ3PHCVTY4E2DAHPF%2F20240604%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=eece32da8855b55208baecc0ce041e79aa03be1c292b58c67ce0215de36cbdb4&hash=46dd1da122f4cea242c6444a811fb16dde5cb8465e88552ac3eaeee97b975e9b&host=68042c943591013ac2b2430a89b270f6af2c76d8dfd086a07176afe7c76c2c61&pii=S2214509524005011&tid=spdf-45c1c4d1-dd97-4c0d-a04f-c30843a79e78&sid=1fea53ed2d5cf1443e4a7c4-33f4bf6475e1gxrqa&type=client&tsoh=d3d3LnNjaWVuY2VkaXJlY3QuY29t&ua=0f155c5f060d565b01055d&rr=88eb49dd2a5f7688&cc=us)
- [A hybrid machine learning approach for predicting fiber-reinforced polymer-concrete interface bond strength](https://download.ssrn.com/eaai/e646e179-ec4a-4987-80b5-8d6bbf43ceda-meca.pdf?response-content-disposition=inline&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEBMaCXVzLWVhc3QtMSJHMEUCIFVH%2Ba5TT2NOEqgCl7GMhXBXBZWE9VzzcRFT6kYXzdxYAiEA4yvXsrzNQnNq%2BkJRB0rw1d2p35f418pIO%2FT3PHKoZ%2BoqxgUI%2B%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAEGgwzMDg0NzUzMDEyNTciDCD0kCrKAqamcwb9LCqaBb4zlqjDhNBhf%2Frbe%2FX3lzSjvS58HiJQtbOHmzaM7putg93e7Wk8nPesoiupTH8uB5ejDC7stGJElRZp5ulT5M6CokoMu82ERn15kMpkgptj3MVEmsY9VTCP%2BCbROJ6v4YcAttOOAEzOc2M6li6o0w4IsF8DNXEIJr%2FJvjB3IDYPkrmpIiHl25h3AzfxPuOF01E2rgucLnY0xTyKGnPBBDZ%2FPtcuqlk2NKun3Q9HbcKj8EPJP%2FPupMW3IQvMnhcdJqqLHXs6wL1P42NTw5vtZO2W5WiEC1CNGDFUTSFRdb9hjhpH4JsYl8X%2BSFT6mZ31K2HTWeuigs5nXp1JN8r8r4O021yiVxHAJ6Chnddr0Z19iM5yOZA4H1EhO1rxxL0VF%2F%2F8Ac3GxuEfkBiug5wuL7aNlBNX6720pYfHH%2FgyrqdU5KSDIp8VYw3KgEij0LkizBHQIoolC48VAEMNc%2F8iWOdZpAVYprhEbABbff8%2BW6c4y1N9vmLTkjZkJtZODpzpQVjrHkL9hAOvmXZocEEN6maRoVJx3DlcTHrfQr8%2BQnPQnmajb5x0FHo44xxBIUt7UB4FOc6beDprle%2F7BO2SNEPLw6rJ9e3WJeVaYch46iqk2tiWFroNHDXlQ73CbzV59AEVtLAR29eIf7uyz%2BU0fOAXG5oAsJyB7YXUjH%2Bh79sxJgBq3%2FoqkEja06CFPRhWeqxixc8y9bEU%2FvvjhfbcWcxGY%2Be%2FwnXbemUbSyr26Y5xvADyicKIMexZNjeHBJ9MKMifQ9oh%2FjmudjxtMLbTpA6EAxMelLjhWcoURF0XeTttMEzEuTjO1OXUwMeXSPZ9roJqH3DB4PHi%2B8UIUG1JoVocv7wDu5ZVlMzgmDr0ti1BShKr9szxagq34jCEkJe8BjqxAbm7bsef33J3AImECx0GZeL0R2tFJZ7ctogL261zP7RqJ4T71rDMbpyfX6HfGuNEbWVROKHUexpuH8FZBodmn%2FjDjZSviK1oxQ1L5TDA2rwMsodnThreIad8vSXqxAzx9qng%2BeN2llXkNdIB7WEnkttzcJ24pZqwYnPI%2FsOznTq%2BDJ88mdNPtzph%2FGdVQcR99tV3waapotTEnUjjoqTTSh9aMgi1jIYMGMrJj6Jb4N%2FhWA%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20250114T024208Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAUPUUPRWEXKDDLJZE%2F20250114%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=11c6f325f84736d5324ab155663c94231696de52be8910a03bb5e9c18f0d1689&abstractId=5055231)
- [Using explainable machine learning and fitbit data to investigate predictors of adolescent obesity](https://www.nature.com/articles/s41598-024-60811-2)
- [Interpretable Predictive Value of Including HDL-2b and HDL-3 in an Explainable Boosting Machine Model for Multiclass Classification of Coronary Artery Stenosis Severity in Acute Myocardial Infarction Patients](https://watermark.silverchair.com/ztae100.pdf?token=AQECAHi208BE49Ooan9kkhW_Ercy7Dm3ZL_9Cf3qfKAc485ysgAAA2owggNmBgkqhkiG9w0BBwagggNXMIIDUwIBADCCA0wGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMnDqoUBnqG9Zyr0dAAgEQgIIDHT2M3owEzTRAV3KZzrOpzyqOYgClio-CQrzB5731fvsEe9ZWO_QfqQAKdaPyyOsEKjacd25hWs-_OvgXCqc36R4yFWu46PFOCApII2s3hbHYI1XEQozWfdyosgaQf_e7_5RIqIfwTEHt19LoYZuaDYjCqq2vmWOMZb6dNI6mz-h3Zd6BgbyYAFgRHiJfU94NU0Crf_AbbTx2jW3HqMBLYPn-ysUiyQYILNmqlKAAlw81ZjBwzusaQFsiJMCxwGyFHks7nwtnUQ8J5PU5Jelp8_fQ8x5_dlZvzvdkI9MR87zUkk4hm2XL0uyfvH92-7VV_2gMe-rU3aJZhbHJu2hENPDh_OmoDe7SOC-5EwPsgIDoDr_dgSgyhBMIbOk_TrSM4oEN6dbtvfLSDXQUWDV4semLuPjqz7WyiQz4PPt1mXuaf12X5xyVsf1Mms4UpGAKLyoCdJ-zDJ9csOPCefIsV2Bzs-KzaD63HWFLJuCU0hWIaK0QOcJATnpQb1PhFiAF6YZ_cCYTxkuAcrQyHS-WCEefNy8hB8PQXhNljtw0J499qdnLcNOM1gAQ3-o21KaTrEFs-DyvZwWmaGn8Zw1bK1CG8yVxWOh6_wjJpGjMMenstzrKFcLbJADs1yf3PuNGZds0g-Qf4NDcgsturcr0V1nLHVRFazWZhUKSeRnLjPzA5i3lVKnmwKjKa_50i0LMSIXNFS-dmvHs-qVUb8FO0_aKZ6egckXkoGG8w3Jox4MhhY2-B28Z0wbJOj8_DojCCtAmAPC0T5emRsuk1rkuRXIoMtFDWN0l7fr7RVkuy1TEd3mpa5UuU7Qo-wu_yqi6ibwLupjGeVN__7SeteoBSh8yFJgYN4BEiYmdkEX7DgKaMC90h5GakNJ7zeAPR9PFnQVRORoof04qMWK4aGod2igso1-qsCup-kVWmPy8zrQKlqxE4OCeqUpKQgZMUUAlFu643iuRnQuLnahXhui45TY8lS56XGCLqkwSG594lMoAXAYZ9tVFM4fAVwQJ3EWkJfHRRCWWGZfLwBPsdUnNEziGg4QIdrKhe-Fu7nLF)
- [Estimate Deformation Capacity of Non-Ductile RC Shear Walls Using Explainable Boosting Machine](https://arxiv.org/pdf/2301.04652.pdf)
Expand All @@ -682,6 +684,8 @@ We also build on top of many great packages. Please check them out!
- [Binary ECG Classification Using Explainable Boosting Machines for IoT Edge Devices](https://ieeexplore.ieee.org/document/9970834)
- [Explainable artificial intelligence toward usable and trustworthy computer-aided diagnosis of multiple sclerosis from Optical Coherence Tomography](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10406231/)
- [An Interpretable Machine Learning Model with Deep Learning-based Imaging Biomarkers for Diagnosis of Alzheimer’s Disease](https://arxiv.org/pdf/2308.07778.pdf)
- [Prediction of Alzheimer Disease on the DARWIN Dataset with Dimensionality Reduction and Explainability Techniques](https://www.scitepress.org/Papers/2024/130174/130174.pdf)
- [Explainable Boosting Machine for Predicting Alzheimer’s Disease from MRI Hippocampal Subfields](https://link.springer.com/chapter/10.1007/978-3-030-86993-9_31)
- [Comparing explainable machine learning approaches with traditional statistical methods for evaluating stroke risk models: retrospective cohort study](https://pureadmin.qub.ac.uk/ws/portalfiles/portal/495863198/JMIR_Cardio.pdf)
- [Explainable Artificial Intelligence for Cotton Yield Prediction With Multisource Data](https://ieeexplore.ieee.org/document/10214067)
- [Preoperative detection of extraprostatic tumor extension in patients with primary prostate cancer utilizing](https://insightsimaging.springeropen.com/articles/10.1186/s13244-024-01876-5)
Expand Down Expand Up @@ -737,7 +741,6 @@ We also build on top of many great packages. Please check them out!
- [Death by Round Numbers and Sharp Thresholds: How to Avoid Dangerous AI EHR Recommendations](https://www.medrxiv.org/content/10.1101/2022.04.30.22274520v1.full.pdf)
- [Building a predictive model to identify clinical indicators for COVID-19 using machine learning method](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9037972/pdf/11517_2022_Article_2568.pdf)
- [Using Innovative Machine Learning Methods to Screen and Identify Predictors of Congenital Heart Diseases](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8777022/pdf/fcvm-08-797002.pdf)
- [Explainable Boosting Machine for Predicting Alzheimer’s Disease from MRI Hippocampal Subfields](https://link.springer.com/chapter/10.1007/978-3-030-86993-9_31)
- [Impact of Accuracy on Model Interpretations](https://arxiv.org/pdf/2011.09903.pdf)
- [Machine Learning Algorithms for Identifying Dependencies in OT Protocols](https://www.mdpi.com/1996-1073/16/10/4056)
- [Causal Understanding of Why Users Share Hate Speech on Social Media](https://arxiv.org/pdf/2310.15772.pdf)
Expand Down
9 changes: 6 additions & 3 deletions python/interpret-core/interpret/glassbox/_ebm/_ebm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1011,8 +1011,11 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
* bag[include_samples]
)

bagged_intercept[idx, :] = np.average(
y_local, weights=sample_weight_local
bagged_intercept[idx, :] = native.flat_mean(
y_local,
None
if sample_weight_local is None
else np.asarray(sample_weight_local, np.float64),
)
elif init_score is None:
if (
Expand Down Expand Up @@ -1472,7 +1475,7 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
if objective_code == Native.Objective_MonoClassification:
pass
elif objective_code == Native.Objective_Rmse:
correction = np.average(y - scores, weights=sample_weight)
correction = native.flat_mean(y - scores, sample_weight)
intercept += correction
bagged_intercept += correction
else:
Expand Down
5 changes: 3 additions & 2 deletions python/interpret-core/interpret/glassbox/_ebm/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def _create_proportional_tensor(axis_weights):


def process_bag_terms(intercept, term_scores, bin_weights):
native = Native.get_native_singleton()
for scores, weights in zip(term_scores, bin_weights):
if develop.get_option("purify_result"):
new_scores, add_impurities, add_intercept = purify(scores, weights)
Expand All @@ -165,7 +166,7 @@ def process_bag_terms(intercept, term_scores, bin_weights):
temp_weights[ignored] = 0.0

if temp_weights.sum() != 0:
mean = np.average(temp_scores, 0, temp_weights)
mean = native.flat_mean(temp_scores, temp_weights)
intercept += mean
scores -= mean
else:
Expand All @@ -178,7 +179,7 @@ def process_bag_terms(intercept, term_scores, bin_weights):
temp_weights[ignored] = 0.0

if temp_weights.sum() != 0:
mean = np.average(temp_scores, 0, temp_weights)
mean = native.flat_mean(temp_scores, temp_weights)
intercept[i] += mean
scores[..., i] -= mean

Expand Down
23 changes: 23 additions & 0 deletions python/interpret-core/interpret/utils/_native.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import platform
import struct
import sys
import math
from contextlib import AbstractContextManager

import numpy as np
Expand Down Expand Up @@ -236,6 +237,28 @@ def clean_float(self, val):
)
return val_array[0]

def flat_mean(self, vals, weights=None):
if weights is not None:
if vals.shape != weights.shape:
msg = "vals and weights must have the same shape to call flat_mean."
raise Exception(msg)

n_tensor_bins = math.prod(vals.shape)

mean_result = ct.c_double(np.nan)

return_code = self._unsafe.SafeMean(
n_tensor_bins,
1,
Native._make_pointer(vals, np.float64, None),
Native._make_pointer(weights, np.float64, None, True),
ct.byref(mean_result),
)
if return_code: # pragma: no cover
raise Native._get_native_exception(return_code, "SafeMean")

return mean_result

def safe_mean(self, tensor, weights=None):
n_bags = tensor.shape[0]
if weights is not None:
Expand Down
2 changes: 1 addition & 1 deletion python/interpret-core/tests/glassbox/ebm/test_ebm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1254,7 +1254,7 @@ def test_identical_classification():
original = get_option("acceleration")
set_option("acceleration", 0)

for iteration in range(3):
for iteration in range(1):
total = 0.0
seed = 0
for i in range(10):
Expand Down

0 comments on commit ad0ebc1

Please sign in to comment.