Explain Blackbox Classifiers#
In this notebook we will use the interpret package to explain blackbox classifiers using SHAP, Lime, MorrisSensitivity, and PartialDependence.
This notebook can be found in our examples folder on GitHub.
# install interpret if not already installed
try:
import interpret
except ModuleNotFoundError:
!pip install --quiet interpret pandas scikit-learn lime
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from interpret import show
from interpret import set_visualize_provider
from interpret.provider import InlineProvider
set_visualize_provider(InlineProvider())
df = pd.read_csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
header=None)
df.columns = [
"Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
"MaritalStatus", "Occupation", "Relationship", "Race", "Gender",
"CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry", "Income"
]
X = df.iloc[:, :-1]
y = (df.iloc[:, -1] == " >50K").astype(int)
# We have to transform categorical variables to use sklearn models
X = pd.get_dummies(X, prefix_sep='.').astype(float)
seed = 42
np.random.seed(seed)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)
Train a blackbox classification system
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
#Blackbox system can include preprocessing, not just a classifier!
pca = PCA()
rf = RandomForestClassifier(random_state=seed)
blackbox_model = Pipeline([('pca', pca), ('rf', rf)])
blackbox_model.fit(X_train, y_train)
Pipeline(steps=[('pca', PCA()), ('rf', RandomForestClassifier(random_state=42))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('pca', PCA()), ('rf', RandomForestClassifier(random_state=42))])
PCA()
RandomForestClassifier(random_state=42)
Show blackbox model performance
from interpret.perf import ROC
blackbox_perf = ROC(blackbox_model).explain_perf(X_test, y_test, name='Blackbox')
show(blackbox_perf)
Local Explanations: How an individual prediction was made
from interpret.blackbox import LimeTabular
#Blackbox explainers need a predict function, and optionally a dataset
lime = LimeTabular(blackbox_model, X_train, random_state=1)
#Pick the instances to explain, optionally pass in labels if you have them
lime_local = lime.explain_local(X_test[:5], y_test[:5], name='LIME')
show(lime_local, 0)
from interpret.blackbox import ShapKernel
background_val = pd.DataFrame(np.median(X_train, axis=0).reshape(1, -1), columns=X.columns)
shap = ShapKernel(blackbox_model, background_val)
shap_local = shap.explain_local(X_test[:5], y_test[:5], name='SHAP')
show(shap_local, 0)
Global Explanations: How the model behaves overall
from interpret.blackbox import MorrisSensitivity
sensitivity = MorrisSensitivity(blackbox_model, X_train)
sensitivity_global = sensitivity.explain_global(name="Global Sensitivity")
show(sensitivity_global)
from interpret.blackbox import PartialDependence
pdp = PartialDependence(blackbox_model, X_train)
pdp_global = pdp.explain_global(name='Partial Dependence')
show(pdp_global, 0)