Differentially Private EBMs#
Links to API References: DPExplainableBoostingClassifier, DPExplainableBoostingRegressor
See the reference paper for full details [1]. Link
Code Example
The following code will train a DPEBM classifier for the adult income dataset. The visualizations provided will be for both global and local explanations.
from interpret import set_visualize_provider
from interpret.provider import InlineProvider
set_visualize_provider(InlineProvider())
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from interpret.privacy import DPExplainableBoostingClassifier
from interpret import show
df = pd.read_csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
header=None)
df.columns = [
"Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
"MaritalStatus", "Occupation", "Relationship", "Race", "Gender",
"CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry", "Income"
]
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
feature_types = ['continuous', 'nominal', 'continuous', 'nominal',
'continuous', 'nominal', 'nominal', 'nominal', 'nominal', 'nominal',
'continuous', 'continuous', 'continuous', 'nominal']
privacy_bounds = {"Age": (17, 90), "fnlwgt": (12285, 1484705),
"EducationNum": (1, 16), "CapitalGain": (0, 99999),
"CapitalLoss": (0, 4356), "HoursPerWeek": (1, 99)
}
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
dpebm = DPExplainableBoostingClassifier(random_state=None, epsilon=1.0, delta=1e-5,
feature_types=feature_types, privacy_bounds=privacy_bounds)
dpebm.fit(X_train, y_train)
auc = roc_auc_score(y_test, dpebm.predict_proba(X_test)[:, 1])
print("AUC: {:.3f}".format(auc))
AUC: 0.884
show(dpebm.explain_global())