Custom Interactions#
In this notebook we will disable the automatic interaction detection built into the interpret API and instead detect the interactions ourselves and then incorporate them into the EBM. We will also detect and use 3-way interactions, which are typically not needed, but can sometimes be useful.
This notebook can be found in our examples folder on GitHub.
# install interpret if not already installed
try:
import interpret
except ModuleNotFoundError:
!pip install --quiet interpret pandas scikit-learn
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from interpret.glassbox import ExplainableBoostingClassifier
from interpret import set_visualize_provider
from interpret.provider import InlineProvider
set_visualize_provider(InlineProvider())
df = pd.read_csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
header=None)
df.columns = [
"Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
"MaritalStatus", "Occupation", "Relationship", "Race", "Gender",
"CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry", "Income"
]
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
seed = 42
np.random.seed(seed)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)
Build a mains model
ebm1 = ExplainableBoostingClassifier(random_state=seed, interactions=0)
ebm1.fit(X_train, y_train)
ExplainableBoostingClassifier(interactions=0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
ExplainableBoostingClassifier(interactions=0)
Determine pairs
from interpret.utils import measure_interactions
from itertools import combinations
n_features = X_train.shape[1]
pairs = measure_interactions(X_train, y_train, interactions=combinations(range(n_features), 2), init_score=ebm1)
pairs = [interaction for interaction, strength in pairs[:10]] # select the top 10 pairs
Build a pure pair model
ebm2 = ExplainableBoostingClassifier(random_state=seed, exclude="mains", interactions=pairs)
ebm2.fit(X_train, y_train, init_score=ebm1)
# modify ebm2 slightly to not have any bins without type definitions
ebm2.bins_ = [l1 if len(l2) == 0 else l2 for l1, l2 in zip(ebm1.bins_, ebm2.bins_)]
Merge the mains and pure pairs into a single model
from interpret.glassbox import merge_ebms
ebm_pairs = merge_ebms([ebm1, ebm2])
# There is no overlap between these EBMs, so merge_ebms will consider
# the non-overlapping terms as having zeros for scores in the other model.
# Undo this by multiplying the scores by 2.0. Also reduce the bin_weights_
# since we're merging the same underlying features.
for i in range(len(ebm_pairs.term_features_)):
ebm_pairs.scale(i, 2.0)
ebm_pairs.bin_weights_[i] *= 0.5
# add intercepts since we're not trying to average the models
ebm_pairs.intercept_ = ebm1.intercept_ + ebm2.intercept_
ebm_pairs.bagged_intercept_ = None
ebm_pairs.bagged_scores_ = None
ebm_pairs.standard_deviations_ = None
Determine triples
triples = measure_interactions(X_train, y_train, interactions=combinations(range(n_features), 3), init_score=ebm_pairs)
triples = [interaction for interaction, strength in triples[:10]] # select the top 10 triples
Build a pure triple EBM
ebm3 = ExplainableBoostingClassifier(random_state=seed, exclude="mains", interactions=triples)
ebm3.fit(X_train, y_train, init_score=ebm_pairs)
# modify ebm3 slightly to not have any bins without type definitions
ebm3.bins_ = [l1 if len(l3) == 0 else l3 for l1, l3 in zip(ebm1.bins_, ebm3.bins_)]
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/interpret/glassbox/_ebm/_ebm.py:1185: UserWarning: Interactions with 3 or more terms are not graphed in global explanations. Local explanations are still available and exact.
warn(
Merge the mains, pairs, and triples into a single model
ebm_triples = merge_ebms([ebm1, ebm2, ebm3])
# There is no overlap between these EBMs, so merge_ebms will consider
# the non-overlappig terms as having zeros for scores in the other model.
# Undo this by multiplying the scores by 3.0. Also reduce the bin_weights_
# since we're merging the same underlying features.
for i in range(len(ebm_triples.term_features_)):
ebm_triples.scale(i, 3.0)
ebm_triples.bin_weights_[i] *= 1.0/3.0
# add intercepts since we're not trying to average the models
ebm_triples.intercept_ = ebm1.intercept_ + ebm2.intercept_ + ebm3.intercept_
ebm_triples.bagged_intercept_ = None
ebm_triples.bagged_scores_ = None
ebm_triples.standard_deviations_ = None
Evaluate the EBMs
from sklearn.metrics import log_loss
loss1 = log_loss(y_test, ebm1.predict_proba(X_test))
print(loss1)
loss2 = log_loss(y_test, ebm_pairs.predict_proba(X_test))
print(loss2)
# compare our custom pair EBM with an EBM built to auto-discover the pairs
ebm_default = ExplainableBoostingClassifier(random_state=seed, interactions=10)
ebm_default.fit(X_train, y_train)
loss2_default = log_loss(y_test, ebm_default.predict_proba(X_test))
print(loss2_default)
loss3 = log_loss(y_test, ebm_triples.predict_proba(X_test))
print(loss3)
0.2722501404306922
0.27182605059178233
0.27168905106614594
0.2715301997241265