Explain Blackbox Classifiers#
In this notebook we will use the interpret package to explain blackbox classifiers using SHAP, Lime, MorrisSensitivity, and PartialDependence.
This notebook can be found in our examples folder on GitHub.
# install interpret if not already installed
try:
import interpret
except ModuleNotFoundError:
!pip install --quiet interpret pandas scikit-learn lime
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from interpret import show
from interpret import set_visualize_provider
from interpret.provider import InlineProvider
set_visualize_provider(InlineProvider())
df = pd.read_csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
header=None)
df.columns = [
"Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
"MaritalStatus", "Occupation", "Relationship", "Race", "Gender",
"CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry", "Income"
]
X = df.iloc[:, :-1]
y = (df.iloc[:, -1] == " >50K").astype(int)
# We have to transform categorical variables to use sklearn models
X = pd.get_dummies(X, prefix_sep='.').astype(float)
seed = 42
np.random.seed(seed)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)
Train a blackbox classification system
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
#Blackbox system can include preprocessing, not just a classifier!
pca = PCA()
rf = RandomForestClassifier(random_state=seed)
blackbox_model = Pipeline([('pca', pca), ('rf', rf)])
blackbox_model.fit(X_train, y_train)
Pipeline(steps=[('pca', PCA()), ('rf', RandomForestClassifier(random_state=42))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('pca', PCA()), ('rf', RandomForestClassifier(random_state=42))])
PCA()
RandomForestClassifier(random_state=42)
Show blackbox model performance
from interpret.perf import ROC
blackbox_perf = ROC(blackbox_model).explain_perf(X_test, y_test, name='Blackbox')
show(blackbox_perf)