Explain Blackbox Classifiers#
In this notebook we will use the interpret package to explain blackbox classifiers using SHAP, Lime, MorrisSensitivity, and PartialDependence.
# install interpret if not already installed
import interpret
except ModuleNotFoundError:
!pip install --quiet interpret pandas scikit-learn lime
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from interpret import show
from interpret import set_visualize_provider
from interpret.provider import InlineProvider
df = pd.read_csv(
df.columns = [
"Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
"MaritalStatus", "Occupation", "Relationship", "Race", "Gender",
"CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry", "Income"
X = df.iloc[:, :-1]
y = (df.iloc[:, -1] == " >50K").astype(int)
# We have to transform categorical variables to use sklearn models
X = pd.get_dummies(X, prefix_sep='.').astype(float)
seed = 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)
Train a blackbox classification system
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
#Blackbox system can include preprocessing, not just a classifier!
pca = PCA()
rf = RandomForestClassifier(random_state=seed)
blackbox_model = Pipeline([('pca', pca), ('rf', rf)])
blackbox_model.fit(X_train, y_train)
Pipeline(steps=[('pca', PCA()), ('rf', RandomForestClassifier(random_state=42))])
Pipeline(steps=[('pca', PCA()), ('rf', RandomForestClassifier(random_state=42))])
Show blackbox model performance
from interpret.perf import ROC
blackbox_perf = ROC(blackbox_model).explain_perf(X_test, y_test, name='Blackbox')
Local Explanations: How an individual prediction was made
from interpret.blackbox import LimeTabular
#Blackbox explainers need a predict function, and optionally a dataset
lime = LimeTabular(blackbox_model, X_train, random_state=1)
#Pick the instances to explain, optionally pass in labels if you have them
lime_local = lime.explain_local(X_test[:5], y_test[:5], name='LIME')
show(lime_local, 0)
from interpret.blackbox import ShapKernel
background_val = pd.DataFrame(np.median(X_train, axis=0).reshape(1, -1), columns=X.columns)
shap = ShapKernel(blackbox_model, background_val)
shap_local = shap.explain_local(X_test[:5], y_test[:5], name='SHAP')
show(shap_local, 0)
Global Explanations: How the model behaves overall
from interpret.blackbox import MorrisSensitivity
sensitivity = MorrisSensitivity(blackbox_model, X_train)
sensitivity_global = sensitivity.explain_global(name="Global Sensitivity")
from interpret.blackbox import PartialDependence
pdp = PartialDependence(blackbox_model, X_train)
pdp_global = pdp.explain_global(name='Partial Dependence')
show(pdp_global, 0)