Custom Interactions#

In this notebook we will disable the automatic interaction detection built into the interpret API and instead detect the interactions ourselves and then incorporate them into the EBM. We will also detect and use 3-way interactions, which are typically not needed, but can sometimes be useful.

This notebook can be found in our examples folder on GitHub.

# install interpret if not already installed
try:
    import interpret
except ModuleNotFoundError:
    !pip install --quiet interpret pandas scikit-learn
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from interpret.glassbox import ExplainableBoostingClassifier

from interpret import set_visualize_provider
from interpret.provider import InlineProvider
set_visualize_provider(InlineProvider())

df = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
    header=None)
df.columns = [
    "Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
    "MaritalStatus", "Occupation", "Relationship", "Race", "Gender",
    "CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry", "Income"
]
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

seed = 42
np.random.seed(seed)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)

Build a mains model

ebm1 = ExplainableBoostingClassifier(random_state=seed, interactions=0)
ebm1.fit(X_train, y_train)
ExplainableBoostingClassifier(interactions=0)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

Determine pairs

from interpret.utils import measure_interactions
from itertools import combinations

n_features = X_train.shape[1]

pairs = measure_interactions(X_train, y_train, interactions=combinations(range(n_features), 2), init_score=ebm1)
pairs = [interaction for interaction, strength in pairs[:10]]  # select the top 10 pairs

Build a pure pair model

ebm2 = ExplainableBoostingClassifier(random_state=seed, exclude="mains", interactions=pairs)
ebm2.fit(X_train, y_train, init_score=ebm1)

# modify ebm2 slightly to not have any bins without type definitions
ebm2.bins_ = [l1 if len(l2) == 0 else l2 for l1, l2 in zip(ebm1.bins_, ebm2.bins_)]

Merge the mains and pure pairs into a single model

from interpret.glassbox import merge_ebms

ebm_pairs = merge_ebms([ebm1, ebm2])

# There is no overlap between these EBMs, so merge_ebms will consider
# the non-overlapping terms as having zeros for scores in the other model. 
# Undo this by multiplying the scores by 2.0. Also reduce the bin_weights_ 
# since we're merging the same underlying features.
for i in range(len(ebm_pairs.term_features_)):
    ebm_pairs.scale(i, 2.0)
    ebm_pairs.bin_weights_[i] *= 0.5

# add intercepts since we're not trying to average the models
ebm_pairs.intercept_ = ebm1.intercept_ + ebm2.intercept_

ebm_pairs.bagged_intercept_ = None
ebm_pairs.bagged_scores_ = None
ebm_pairs.standard_deviations_ = None

Determine triples

triples = measure_interactions(X_train, y_train, interactions=combinations(range(n_features), 3), init_score=ebm_pairs)
triples = [interaction for interaction, strength in triples[:10]]  # select the top 10 triples

Build a pure triple EBM

ebm3 = ExplainableBoostingClassifier(random_state=seed, exclude="mains", interactions=triples)
ebm3.fit(X_train, y_train, init_score=ebm_pairs)

# modify ebm3 slightly to not have any bins without type definitions
ebm3.bins_ = [l1 if len(l3) == 0 else l3 for l1, l3 in zip(ebm1.bins_, ebm3.bins_)]
/opt/hostedtoolcache/Python/3.9.19/x64/lib/python3.9/site-packages/interpret/glassbox/_ebm/_ebm.py:1156: UserWarning: Interactions with 3 or more terms are not graphed in global explanations. Local explanations are still available and exact.
  warn(

Merge the mains, pairs, and triples into a single model

ebm_triples = merge_ebms([ebm1, ebm2, ebm3])

# There is no overlap between these EBMs, so merge_ebms will consider
# the non-overlappig terms as having zeros for scores in the other model. 
# Undo this by multiplying the scores by 3.0. Also reduce the bin_weights_ 
# since we're merging the same underlying features.
for i in range(len(ebm_triples.term_features_)):
    ebm_triples.scale(i, 3.0)
    ebm_triples.bin_weights_[i] *= 1.0/3.0

# add intercepts since we're not trying to average the models
ebm_triples.intercept_ = ebm1.intercept_ + ebm2.intercept_ + ebm3.intercept_

ebm_triples.bagged_intercept_ = None
ebm_triples.bagged_scores_ = None
ebm_triples.standard_deviations_ = None

Evaluate the EBMs

from sklearn.metrics import log_loss

loss1 = log_loss(y_test, ebm1.predict_proba(X_test))
print(loss1)

loss2 = log_loss(y_test, ebm_pairs.predict_proba(X_test))
print(loss2)

# compare our custom pair EBM with an EBM built to auto-discover the pairs
ebm_default = ExplainableBoostingClassifier(random_state=seed, interactions=10)
ebm_default.fit(X_train, y_train)
loss2_default = log_loss(y_test, ebm_default.predict_proba(X_test))
print(loss2_default)

loss3 = log_loss(y_test, ebm_triples.predict_proba(X_test))
print(loss3)
0.27204571071606864
0.2716838669794162
0.2718129833423553
0.27166359994734074