TP9 - Model Explainability with LIME and SHAP
Practical Lab 75 min Intermediate
Objectives
By the end of this lab, you will be able to:
- Load a trained model and apply LIME to explain individual predictions
- Apply SHAP for both local and global explanations
- Generate visualizations: force plots, summary plots, waterfall plots, dependence plots
- Compare LIME and SHAP outputs for the same prediction
- Prepare a presentation-ready explainability report
Prerequisites
- Python 3.10+ with pip
- Trained model
models/model_v1.joblibfrom Module 2 - Training data
data/X_train.npyanddata/X_test.npy
No Model or Data?
If you don't have files from previous modules, run this setup script first:
# setup_explainability.py
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import joblib
import numpy as np
from pathlib import Path
X, y = make_classification(
n_samples=1000,
n_features=5,
n_informative=4,
n_redundant=1,
random_state=42,
class_sep=1.5,
)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
print(f"Model accuracy: {model.score(X_test, y_test):.4f}")
Path("models").mkdir(exist_ok=True)
Path("data").mkdir(exist_ok=True)
Path("outputs").mkdir(exist_ok=True)
joblib.dump(model, "models/model_v1.joblib")
np.save("data/X_train.npy", X_train)
np.save("data/X_test.npy", X_test)
np.save("data/y_train.npy", y_train)
np.save("data/y_test.npy", y_test)
print("Setup complete — model, train/test data saved")
Lab Overview
Step 1 — Setup and Load Model
1.1 Install Dependencies
pip install lime shap matplotlib numpy joblib scikit-learn
1.2 Create the Explainability Script
Create a file explainability_lab.py:
# explainability_lab.py — Step 1: Load model and data
import joblib
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from pathlib import Path
FEATURE_NAMES = [
"credit_score",
"annual_income",
"debt_ratio",
"employment_years",
"loan_amount",
]
CLASS_NAMES = ["Rejected", "Approved"]
OUTPUT_DIR = Path("outputs")
OUTPUT_DIR.mkdir(exist_ok=True)
model = joblib.load("models/model_v1.joblib")
X_train = np.load("data/X_train.npy")
X_test = np.load("data/X_test.npy")
y_test = np.load("data/y_test.npy")
print(f"Model type: {type(model).__name__}")
print(f"Training samples: {X_train.shape[0]}")
print(f"Test samples: {X_test.shape[0]}")
print(f"Features: {X_train.shape[1]}")
print(f"Model accuracy: {model.score(X_test, y_test):.4f}")
instance_idx = 0
instance = X_test[instance_idx]
true_label = y_test[instance_idx]
prediction = model.predict([instance])[0]
probas = model.predict_proba([instance])[0]
print(f"\n--- Instance #{instance_idx} ---")
print(f"Features: {dict(zip(FEATURE_NAMES, instance.round(3)))}")
print(f"True label: {CLASS_NAMES[true_label]} ({true_label})")
print(f"Prediction: {CLASS_NAMES[prediction]} ({prediction})")
print(f"Probabilities: {CLASS_NAMES[0]}={probas[0]:.4f}, {CLASS_NAMES[1]}={probas[1]:.4f}")
Run it:
python explainability_lab.py
Expected output:
Model type: RandomForestClassifier
Training samples: 800
Test samples: 200
Features: 5
Model accuracy: 0.9250
--- Instance #0 ---
Features: {'credit_score': 1.234, 'annual_income': -0.567, ...}
True label: Approved (1)
Prediction: Approved (1)
Probabilities: Rejected=0.1200, Approved=0.8800
Step 2 — LIME Explanations
2.1 Create the LIME Explainer
Add to your script:
# Step 2: LIME explanations
import lime
import lime.lime_tabular
lime_explainer = lime.lime_tabular.LimeTabularExplainer(
training_data=X_train,
feature_names=FEATURE_NAMES,
class_names=CLASS_NAMES,
mode="classification",
random_state=42,
discretize_continuous=True,
)
print("\nLIME Explainer created successfully")
2.2 Explain a Single Prediction
lime_explanation = lime_explainer.explain_instance(
data_row=instance,
predict_fn=model.predict_proba,
num_features=5,
num_samples=5000,
)
print(f"\n--- LIME Explanation for Instance #{instance_idx} ---")
print(f"Predicted class: {CLASS_NAMES[prediction]}")
print(f"Local prediction (LIME): {lime_explanation.local_pred[0]:.4f}")
print(f"\nFeature contributions (toward {CLASS_NAMES[1]}):")
for feature_rule, weight in lime_explanation.as_list():
direction = "+" if weight > 0 else ""
bar = "█" * int(abs(weight) * 50)
print(f" {feature_rule:40s} {direction}{weight:.4f} {bar}")
2.3 Save LIME Visualization
fig = lime_explanation.as_pyplot_figure()
fig.set_size_inches(10, 6)
fig.tight_layout()
fig.savefig(OUTPUT_DIR / "lime_explanation_instance_0.png", dpi=150, bbox_inches="tight")
plt.close(fig)
print(f"\nLIME plot saved to {OUTPUT_DIR / 'lime_explanation_instance_0.png'}")
lime_explanation.save_to_file(str(OUTPUT_DIR / "lime_explanation_instance_0.html"))
print(f"LIME HTML saved to {OUTPUT_DIR / 'lime_explanation_instance_0.html'}")
2.4 Explain Multiple Instances
print("\n--- LIME Explanations for Multiple Instances ---")
lime_results = []
for idx in range(5):
inst = X_test[idx]
pred = model.predict([inst])[0]
exp = lime_explainer.explain_instance(
data_row=inst,
predict_fn=model.predict_proba,
num_features=5,
num_samples=3000,
)
lime_results.append({
"index": idx,
"prediction": CLASS_NAMES[pred],
"contributions": exp.as_list(),
})
print(f" Instance #{idx}: {CLASS_NAMES[pred]:10s} | "
f"Top feature: {exp.as_list()[0][0]} ({exp.as_list()[0][1]:+.4f})")
Step 3 — SHAP Explanations
3.1 Create the SHAP Explainer
# Step 3: SHAP explanations
import shap
shap_explainer = shap.TreeExplainer(model)
shap_values = shap_explainer.shap_values(X_test)
print(f"\n--- SHAP Analysis ---")
print(f"SHAP values shape: {np.array(shap_values).shape}")
print(f"Base value (Class 0): {shap_explainer.expected_value[0]:.4f}")
print(f"Base value (Class 1): {shap_explainer.expected_value[1]:.4f}")
3.2 Explain a Single Prediction with SHAP
print(f"\n--- SHAP Explanation for Instance #{instance_idx} ---")
print(f"Base value: {shap_explainer.expected_value[1]:.4f}")
print(f"Prediction probability (Class 1): {probas[1]:.4f}")
print(f"\nFeature SHAP values (toward {CLASS_NAMES[1]}):")
for i, fname in enumerate(FEATURE_NAMES):
sv = shap_values[1][instance_idx][i]
fv = instance[i]
direction = "+" if sv > 0 else ""
bar = "█" * int(abs(sv) * 100)
print(f" {fname:20s} (value={fv:+.3f}): {direction}{sv:.4f} {bar}")
shap_sum = sum(shap_values[1][instance_idx])
print(f"\nSum of SHAP values: {shap_sum:.4f}")
print(f"Base + SHAP sum: {shap_explainer.expected_value[1] + shap_sum:.4f}")
print(f"Actual prediction: {probas[1]:.4f}")
print(f"Difference: {abs(shap_explainer.expected_value[1] + shap_sum - probas[1]):.6f}")
SHAP Additivity Property
Notice how base_value + sum(SHAP values) ≈ prediction probability. This is the local accuracy property of SHAP — the explanation perfectly accounts for the prediction.
Step 4 — Visualizations
4.1 SHAP Force Plot
shap.initjs()
plt.figure()
shap.force_plot(
base_value=shap_explainer.expected_value[1],
shap_values=shap_values[1][instance_idx],
features=instance,
feature_names=FEATURE_NAMES,
matplotlib=True,
show=False,
)
plt.savefig(OUTPUT_DIR / "shap_force_plot.png", dpi=150, bbox_inches="tight")
plt.close()
print(f"Force plot saved to {OUTPUT_DIR / 'shap_force_plot.png'}")
4.2 SHAP Summary Plot (Global)
plt.figure(figsize=(10, 6))
shap.summary_plot(
shap_values[1],
features=X_test,
feature_names=FEATURE_NAMES,
show=False,
)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / "shap_summary_plot.png", dpi=150, bbox_inches="tight")
plt.close()
print(f"Summary plot saved to {OUTPUT_DIR / 'shap_summary_plot.png'}")
4.3 SHAP Bar Plot (Mean Importance)
plt.figure(figsize=(10, 6))
shap.summary_plot(
shap_values[1],
features=X_test,
feature_names=FEATURE_NAMES,
plot_type="bar",
show=False,
)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / "shap_bar_plot.png", dpi=150, bbox_inches="tight")
plt.close()
print(f"Bar plot saved to {OUTPUT_DIR / 'shap_bar_plot.png'}")
4.4 SHAP Waterfall Plot
shap_explanation = shap.Explanation(
values=shap_values[1][instance_idx],
base_values=shap_explainer.expected_value[1],
data=instance,
feature_names=FEATURE_NAMES,
)
plt.figure(figsize=(10, 6))
shap.waterfall_plot(shap_explanation, show=False)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / "shap_waterfall_plot.png", dpi=150, bbox_inches="tight")
plt.close()
print(f"Waterfall plot saved to {OUTPUT_DIR / 'shap_waterfall_plot.png'}")
4.5 SHAP Dependence Plots
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
for i, feature_idx in enumerate([0, 1, 2]):
shap.dependence_plot(
ind=feature_idx,
shap_values=shap_values[1],
features=X_test,
feature_names=FEATURE_NAMES,
ax=axes[i],
show=False,
)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / "shap_dependence_plots.png", dpi=150, bbox_inches="tight")
plt.close()
print(f"Dependence plots saved to {OUTPUT_DIR / 'shap_dependence_plots.png'}")
4.6 Multi-Instance Waterfall Comparison
fig, axes = plt.subplots(1, 3, figsize=(24, 6))
for plot_idx, inst_idx in enumerate([0, 5, 10]):
exp = shap.Explanation(
values=shap_values[1][inst_idx],
base_values=shap_explainer.expected_value[1],
data=X_test[inst_idx],
feature_names=FEATURE_NAMES,
)
plt.sca(axes[plot_idx])
shap.waterfall_plot(exp, show=False)
axes[plot_idx].set_title(
f"Instance #{inst_idx} — {CLASS_NAMES[model.predict([X_test[inst_idx]])[0]]}"
)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / "shap_multi_waterfall.png", dpi=150, bbox_inches="tight")
plt.close()
print(f"Multi-waterfall saved to {OUTPUT_DIR / 'shap_multi_waterfall.png'}")
Step 5 — Compare LIME vs SHAP
# Step 5: Side-by-side comparison
print("\n" + "=" * 70)
print("LIME vs SHAP Comparison — Instance #0")
print("=" * 70)
lime_contributions = {
rule.split(" ")[0] if "<" not in rule.split(" ")[0] else rule.split(" ")[0]:
weight for rule, weight in lime_explanation.as_list()
}
shap_contributions = {
FEATURE_NAMES[i]: shap_values[1][instance_idx][i]
for i in range(len(FEATURE_NAMES))
}
print(f"\n{'Feature':<25s} {'LIME':>10s} {'SHAP':>10s} {'Agreement':>12s}")
print("-" * 60)
lime_list = lime_explanation.as_list()
for i, fname in enumerate(FEATURE_NAMES):
shap_val = shap_contributions[fname]
lime_val = lime_list[i][1] if i < len(lime_list) else 0
agree = "✅ Same" if (lime_val > 0 and shap_val > 0) or (lime_val < 0 and shap_val < 0) else "⚠️ Differ"
print(f" {fname:<23s} {lime_val:>+10.4f} {shap_val:>+10.4f} {agree:>12s}")
# Rank comparison
lime_ranked = [rule.split(" ")[0] for rule, _ in sorted(
lime_explanation.as_list(), key=lambda x: abs(x[1]), reverse=True
)]
shap_ranked = [FEATURE_NAMES[i] for i in np.argsort(np.abs(shap_values[1][instance_idx]))[::-1]]
print(f"\nFeature ranking (by importance):")
print(f" {'Rank':<6s} {'LIME':>25s} {'SHAP':>25s}")
for rank in range(min(5, len(FEATURE_NAMES))):
lime_f = lime_ranked[rank] if rank < len(lime_ranked) else "—"
shap_f = shap_ranked[rank]
print(f" #{rank+1:<5d} {lime_f:>25s} {shap_f:>25s}")
Step 6 — Explainability Report
6.1 Generate Report Data
# Step 6: Generate explainability report
import json
from datetime import datetime
mean_abs_shap = np.mean(np.abs(shap_values[1]), axis=0)
global_ranking = np.argsort(mean_abs_shap)[::-1]
report = {
"report_date": datetime.now().isoformat(),
"model_type": type(model).__name__,
"model_accuracy": float(model.score(X_test, y_test)),
"num_test_samples": int(X_test.shape[0]),
"feature_names": FEATURE_NAMES,
"class_names": CLASS_NAMES,
"global_feature_importance": {
FEATURE_NAMES[i]: {
"rank": rank + 1,
"mean_abs_shap": float(mean_abs_shap[i]),
}
for rank, i in enumerate(global_ranking)
},
"sample_explanations": [],
}
for idx in range(3):
inst = X_test[idx]
pred = model.predict([inst])[0]
prob = model.predict_proba([inst])[0]
report["sample_explanations"].append({
"instance_index": idx,
"prediction": CLASS_NAMES[pred],
"confidence": float(max(prob)),
"true_label": CLASS_NAMES[int(y_test[idx])],
"shap_values": {
FEATURE_NAMES[i]: float(shap_values[1][idx][i])
for i in range(len(FEATURE_NAMES))
},
"feature_values": {
FEATURE_NAMES[i]: float(inst[i])
for i in range(len(FEATURE_NAMES))
},
})
with open(OUTPUT_DIR / "explainability_report.json", "w") as f:
json.dump(report, f, indent=2)
print(f"\nJSON report saved to {OUTPUT_DIR / 'explainability_report.json'}")
6.2 Generate a Markdown Report
md_lines = [
"# Model Explainability Report",
f"\n**Date**: {datetime.now().strftime('%Y-%m-%d %H:%M')}",
f"\n**Model**: {type(model).__name__}",
f"\n**Accuracy**: {model.score(X_test, y_test):.2%}",
f"\n**Test samples analyzed**: {X_test.shape[0]}",
"\n---\n",
"## Global Feature Importance (SHAP)\n",
"| Rank | Feature | Mean |SHAP| | Relative Impact |",
"|------|---------|-----------|-----------------|",
]
max_shap = mean_abs_shap.max()
for rank, i in enumerate(global_ranking):
bar_len = int(mean_abs_shap[i] / max_shap * 20)
bar = "█" * bar_len
md_lines.append(
f"| {rank+1} | {FEATURE_NAMES[i]} | {mean_abs_shap[i]:.4f} | {bar} |"
)
md_lines.extend([
"\n---\n",
"## Sample Explanations\n",
])
for idx in range(3):
inst = X_test[idx]
pred = model.predict([inst])[0]
prob = model.predict_proba([inst])[0]
md_lines.append(f"\n### Instance #{idx}\n")
md_lines.append(f"- **Prediction**: {CLASS_NAMES[pred]} (confidence: {max(prob):.2%})")
md_lines.append(f"- **True label**: {CLASS_NAMES[int(y_test[idx])]}")
md_lines.append(f"\n| Feature | Value | SHAP Value | Direction |")
md_lines.append(f"|---------|-------|------------|-----------|")
for i in range(len(FEATURE_NAMES)):
sv = shap_values[1][idx][i]
direction = f"→ {CLASS_NAMES[1]}" if sv > 0 else f"→ {CLASS_NAMES[0]}"
md_lines.append(
f"| {FEATURE_NAMES[i]} | {inst[i]:.3f} | {sv:+.4f} | {direction} |"
)
md_lines.extend([
"\n---\n",
"## Visualizations\n",
"- `shap_summary_plot.png` — Global feature importance with value distribution",
"- `shap_bar_plot.png` — Mean absolute SHAP values per feature",
"- `shap_force_plot.png` — Force plot for Instance #0",
"- `shap_waterfall_plot.png` — Waterfall plot for Instance #0",
"- `shap_dependence_plots.png` — Feature interaction plots",
"- `lime_explanation_instance_0.png` — LIME explanation for Instance #0",
"\n---\n",
"## Conclusions\n",
f"The most influential feature is **{FEATURE_NAMES[global_ranking[0]]}**, "
f"with a mean |SHAP| of {mean_abs_shap[global_ranking[0]]:.4f}.\n",
f"The least influential feature is **{FEATURE_NAMES[global_ranking[-1]]}**, "
f"suggesting it could potentially be removed without significant accuracy loss.\n",
"LIME and SHAP generally agree on feature direction but may rank features differently "
"due to their different methodologies (local linear approximation vs. game-theoretic attribution).",
])
report_path = OUTPUT_DIR / "explainability_report.md"
with open(report_path, "w", encoding="utf-8") as f:
f.write("\n".join(md_lines))
print(f"Markdown report saved to {report_path}")
6.3 Summary of Generated Files
ls outputs/
Expected:
outputs/
├── explainability_report.json
├── explainability_report.md
├── lime_explanation_instance_0.html
├── lime_explanation_instance_0.png
├── shap_bar_plot.png
├── shap_dependence_plots.png
├── shap_force_plot.png
├── shap_multi_waterfall.png
├── shap_summary_plot.png
└── shap_waterfall_plot.png
Validation Checklist
Before moving to the quiz, verify:
- Model is loaded and working (accuracy displayed)
- LIME explains an individual prediction (contributions displayed)
- LIME generates a PNG plot and an HTML file
- SHAP computes SHAP values for the entire test set
- SHAP additivity property is verified (base + sum ≈ prediction)
- 6 SHAP visualizations generated (force, summary, bar, waterfall, dependence, multi-waterfall)
- LIME vs SHAP comparison displayed
- JSON and Markdown reports generated in
outputs/
Going Further
Challenge: Integrate SHAP into the FastAPI API
Add an endpoint /api/v1/explain to your API:
@app.post("/api/v1/explain")
def explain_prediction(request: PredictionRequest):
features = np.array([request.features])
prediction = model.predict(features)[0]
shap_vals = shap_explainer.shap_values(features)
explanation = {
"prediction": int(prediction),
"base_value": float(shap_explainer.expected_value[1]),
"feature_contributions": {
FEATURE_NAMES[i]: {
"value": float(request.features[i]),
"shap_value": float(shap_vals[1][0][i]),
"direction": "positive" if shap_vals[1][0][i] > 0 else "negative",
}
for i in range(len(FEATURE_NAMES))
},
}
return explanation
Challenge: Detect bias with SHAP
Check if a "sensitive" feature (e.g., gender, age) has a disproportionate impact:
sensitive_feature_idx = 3 # employment_years as proxy
sensitive_shap = shap_values[1][:, sensitive_feature_idx]
print(f"Mean SHAP for {FEATURE_NAMES[sensitive_feature_idx]}:")
print(f" Class 0 predictions: {sensitive_shap[y_test == 0].mean():.4f}")
print(f" Class 1 predictions: {sensitive_shap[y_test == 1].mean():.4f}")
print(f" Overall: {sensitive_shap.mean():.4f}")
if abs(sensitive_shap.mean()) > 0.1:
print("⚠️ This feature has significant impact — review for fairness")
else:
print("✅ Impact is moderate — no immediate fairness concern")
Well done!
You now know how to explain your model's predictions with LIME and SHAP. These skills are essential for regulatory compliance (EU AI Act), model debugging, and communication with stakeholders.