import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..')))
from library.pipeline.pipeline import Pipeline
from library.pipeline.analysis.neuralNets.neuralNetsPlots import NeuralNetsPlots
from library.utils.miscellaneous.save_or_store_plot import save_or_store_plot
from library.utils.miscellaneous.eliminate_unsued_plots import eliminate_unused_plots
import yaml
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from sklearn.metrics import classification_report
from sklearn.inspection import permutation_importance
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from sklearn.model_selection import train_test_split
import lime
import lime.lime_tabular
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import math
import pandas as pd
import numpy as np
[docs]
class PipelinesAnalysis:
def __init__(self, pipelines: dict[str, dict[str, Pipeline]]):
self.pipelines = pipelines
self.encoded_map = None
self.phase = None
self.best_performing_model = None
self.neural_nets_plots = None
self.variables = yaml.load(open("library/configurations.yaml"), Loader=yaml.FullLoader)
# Below you can find two attributes that are used to store the results of the analysis.
self.merged_report_per_phase = {
"pre": None,
"in": None,
"post": None
}
self.results_per_phase = {
"pre": {
"classification_report": None,
"classification_report_train": None,
"metrics_df": None
},
"in": {
"classification_report": None,
"classification_report_train": None,
"metrics_df": None
},
"post": {
"classification_report": None,
"classification_report_train": None,
"metrics_df": None
}
}
def _create_report_dataframe(self, report: dict, modelName: str, include_training: bool = False):
"""_summary_
Args:
report (dict): _description_
modelName (str): _description_
include_training (bool, optional): _description_. Defaults to False.
Returns:
_type_: _description_
"""
accuracy = report.pop('accuracy')
report['modelName'] = modelName + ("_train" if include_training else "")
df = pd.DataFrame(report)
df.loc['accuracy'] = accuracy
df.loc['accuracy', 'modelName'] = modelName + ("_train" if include_training else "")
return df
def _add_additional_metrics_to_report(self, df: pd.DataFrame, modelName: str, additional_metrics: dict, include_training: bool = False):
"""_summary_
Args:
df (pd.DataFrame): _description_
modelName (str): _description_
additional_metrics (dict): _description_
include_training (bool, optional): _description_. Defaults to False.
Returns:
_type_: _description_
"""
if not include_training:
for key, value in additional_metrics["not_train"].items():
df.loc[key] = value
df.loc[key, "modelName"] = modelName
else:
for key, value in additional_metrics["train"].items():
key = key.split("_")[0] # remove the postfix
df.loc[key] = value
df.loc[key, "modelName"] = modelName + "_train"
return df
def _compute_classification_report(self, include_training: bool = False):
"""
Computes the classification report for each model in the pipelines for the current phase.
Optionally includes training data evaluation and maps encoded class labels.
Parameters
----------
include_training : bool, optional
Whether to include classification reports for training data (default is False).
Returns
-------
pd.DataFrame
Merged classification report for all evaluated models in the current phase.
"""
assert self.phase in ["pre", "in", "post"], "Phase must be either pre, in or post"
classification_reports = []
for category in self.pipelines:
if self.phase == "in" and category == "baseline": # We do not evaluate the baseline models while tuning (cause they are not tuned)
continue
for pipeline in self.pipelines[category]:
for modelName in self.pipelines[category][pipeline].modelling.list_of_models:
if self.phase == "post" and category == "not_baseline" and self.best_performing_model["modelName"] != modelName: # Only select the model that is the best if pipeline is in post mode
continue
if modelName not in self.pipelines[category][pipeline].modelling.models_to_exclude: # Exclude models that are not to be included
additional_metrics = self.pipelines[category][pipeline].modelling.list_of_models[modelName].tuning_states[self.phase].assesment["metrics"]["additional_metrics"]
if self.phase != "post":
y_pred = self.pipelines[category][pipeline].modelling.list_of_models[modelName].tuning_states[self.phase].assesment["predictions_val"]
y_true = self.pipelines[category][pipeline].modelling.dataset.y_val
assert y_pred is not None, f"Predictions are None for model: {modelName}. Phase: {self.phase}, Category: {category}, Pipeline: {pipeline}"
assert y_true is not None, f"Actual is None for model: {modelName}"
not_train_report = classification_report(y_true, y_pred, output_dict=True, zero_division=0)
df_not_train = self._create_report_dataframe(not_train_report, modelName)
df_not_train = self._add_additional_metrics_to_report(df_not_train, modelName, additional_metrics)
if include_training: # inter-model evaluation (meaning u compare the overftting)
y_pred_train = self.pipelines[category][pipeline].modelling.list_of_models[modelName].tuning_states[self.phase].assesment["predictions_train"]
y_true_train = self.pipelines[category][pipeline].modelling.dataset.y_train
training_report = classification_report(y_true_train, y_pred_train, output_dict=True, zero_division=0)
df_training_report = self._create_report_dataframe(training_report, modelName, include_training=True)
df_training_report = self._add_additional_metrics_to_report(df_training_report, modelName, additional_metrics, include_training=True)
else:
y_pred = self.pipelines[category][pipeline].modelling.list_of_models[modelName].tuning_states[self.phase].assesment["predictions_test"]
y_true = self.pipelines[category][pipeline].modelling.dataset.y_test
not_train_report = classification_report(y_true, y_pred, output_dict=True, zero_division=0)
df_not_train = self._create_report_dataframe(not_train_report, modelName)
df_not_train = self._add_additional_metrics_to_report(df_not_train, modelName, additional_metrics)
if include_training:
y_pred_train = self.pipelines[category][pipeline].modelling.list_of_models[modelName].tuning_states[self.phase].assesment["predictions_train"]
train = self.pipelines[category][pipeline].modelling.dataset.y_train
val = self.pipelines[category][pipeline].modelling.dataset.y_val
y_true_train = np.concatenate([train, val])
training_report = classification_report(y_true_train, y_pred_train, output_dict=True, zero_division=0)
df_training_report = self._create_report_dataframe(training_report, modelName, include_training=True)
df_training_report = self._add_additional_metrics_to_report(df_training_report, modelName, additional_metrics, include_training=True)
classification_reports.append(df_not_train)
if include_training:
classification_reports.append(df_training_report)
self.merged_report_per_phase[self.phase] = pd.concat(classification_reports).T # Get all the reports for the models in all the pipelines together
# This is given the encoded map (the numbers in target variable to the actual class names)
if self.encoded_map is not None:
reverse_map = {str(v): k for k, v in self.encoded_map.items()} #{number:name}
index = self.merged_report_per_phase[self.phase].index.tolist()
new_index = []
for idx in index:
if idx in reverse_map:
new_index.append(reverse_map[idx])
else:
new_index.append(idx)
self.merged_report_per_phase[self.phase].index = new_index
return self.merged_report_per_phase[self.phase]
[docs]
def plot_cross_model_comparison(self, metrics: list[str] = None, cols: int = 2, save_plots: bool = False, save_path: str = None):
"""
Plots a comparison of classification metrics across different models for the current phase.
Generates subplots for each selected metric and optionally saves the result.
Parameters
----------
metrics : list of str, optional
List of metric names to include in the plots. If None, default classification metrics are used.
cols : int, optional
Number of columns in the subplot grid (default is 2).
save_plots : bool, optional
Whether to save the generated plots to disk (default is False).
save_path : str, optional
Directory path where plots should be saved if save_plots is True.
Returns
-------
None
"""
assert self.phase in ["pre", "in", "post"], "Phase must be either pre, in or post"
if not metrics:
metrics = self.variables["dataset_runner"]["metrics_to_evaluate"]["classification"]
print(f"PLOTTING CROSS MODEL COMPARISON FOR {self.phase} PHASE")
# Compute the classification report DataFrame.
class_report_df = self._compute_classification_report()
self.results_per_phase[self.phase]["classification_report"] = class_report_df
num_metrics = len(metrics)
rows = math.ceil(num_metrics / cols)
fig, axes = plt.subplots(rows, cols, figsize=(cols * 8, rows * 7))
axes = axes.flatten()
for i, metric_key in enumerate(metrics):
print(f"Plotting: {metric_key}")
class_report_cols = class_report_df.columns
assert metric_key in class_report_cols, f"Metric not present in {class_report_cols}"
ax = axes[i]
metric_df = class_report_df[metric_key]
df_numeric = metric_df.iloc[:-1].astype(float)
model_names = metric_df.loc["modelName"]
isConstantMetric = len(set(df_numeric.iloc[:, 0])) == 1
if isinstance(model_names, str): # single model
model_names = [model_names]
ax.plot(df_numeric.index, df_numeric.iloc[:], marker='o', label=model_names[0])
else:
model_names = model_names.values
if isConstantMetric:
bars = ax.bar(model_names, df_numeric.iloc[0, :])
ax.bar_label(bars, fmt='%.4f')
else:
for i, model_name in enumerate(model_names):
ax.plot(df_numeric.index, df_numeric.iloc[:, i], marker='o', label=model_name)
ax.set_title(f'{metric_key} by Model')
ax.set_xlabel('Class Index')
ax.set_ylabel(metric_key)
ax.set_ylim(0, 1)
ax.tick_params(axis='x', rotation=45)
ax.legend()
ax.grid(True)
eliminate_unused_plots(fig, axes, i)
plt.tight_layout()
plt.suptitle(f"Cross-model Performance Comparison - {self.phase} phase")
plt.tight_layout(rect=[0, 0, 1, 0.96])
save_or_store_plot(fig, save_plots, directory_path=save_path + f"/{self.phase}/model_performance", filename=f"cross_model_comparison_{self.phase}.png")
[docs]
def plot_intra_model_comparison(self, metrics: list[str] = None, save_plots: bool = False, save_path: str = None):
"""
Plots training vs validation/test performance for each model across selected metrics.
One row per model, each with side-by-side metric trends for comparison.
Parameters
----------
metrics : list of str, optional
List of metric names to plot. If None, uses default classification metrics.
save_plots : bool, optional
Whether to save the generated plots to disk (default is False).
save_path : str, optional
Directory path where plots should be saved if save_plots is True.
Returns
-------
None
"""
print(f"METRICS IS {metrics}")
if not metrics:
metrics = self.variables["dataset_runner"]["metrics_to_evaluate"]["classification"]
class_report_df = self._compute_classification_report(include_training=True)
self.results_per_phase[self.phase]["classification_report_train"] = class_report_df
models = class_report_df.T["modelName"].unique()
models = {model.split("_")[0] for model in models}
num_metrics = len(metrics)
cols = num_metrics
rows = len(models)
fig, axes = plt.subplots(rows, cols, figsize=(cols * 6, rows * 5))
colors = ["red", "blue", "green", "purple", "orange", "brown", "pink", "gray", "cyan", "magenta"]
colors_length = len(colors)
for i, model in enumerate(models):
color_train = colors[i % colors_length]
color_no_train = colors[(i + 1) % colors_length]
for j, metric in enumerate(metrics):
class_report_cols = class_report_df.columns
assert metric in class_report_cols, f"Metric not present in {class_report_cols}"
model_filter = class_report_df.T["modelName"].str.startswith(model)
model_df = class_report_df.T[model_filter]
ax = axes[i, j]
metric_df = model_df.T[metric]
df_numeric = metric_df.iloc[:-1].astype(float)
model_names = metric_df.loc["modelName"].values
isConstantMetric = len(set(df_numeric.iloc[:, 0])) == 1
if isConstantMetric:
bars = ax.bar(model_names, df_numeric.iloc[0, :])
ax.bar_label(bars, fmt='%.4f')
else:
ax.plot(df_numeric.index, df_numeric.iloc[:, 0], marker="o", label=model_names[0], color=color_train)
ax.plot(df_numeric.index, df_numeric.iloc[:, 1], marker="s", label=model_names[1], color=color_no_train)
ax.set_title(f'{metric} - {model}')
ax.set_xlabel('Class Index')
ax.set_ylabel(metric)
ax.set_ylim(0, 1)
ax.tick_params(axis='x', rotation=45)
if metric != "accuracy":
ax.legend()
ax.grid(True)
plt.tight_layout()
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.suptitle(f"Intra-model Perfomance Comparison - {self.phase} phase")
plt.show()
save_or_store_plot(fig, save_plots, directory_path=save_path + f"/{self.phase}/model_performance", filename=f"intra_model_comparison_{self.phase}.png")
[docs]
def plot_results_df(self, metrics: list[str], save_plots: bool = False, save_path: str = None):
"""
Plots general and time-based performance metrics (e.g., fit/predict time) for all models in the current phase.
Displays bar charts per metric and optionally saves the results.
Parameters
----------
metrics : list of str
List of metrics to visualize (e.g., accuracy, time_to_fit).
save_plots : bool, optional
Whether to save the generated plots to disk (default is False).
save_path : str, optional
Directory path where plots should be saved if save_plots is True.
Returns
-------
pd.DataFrame
Concatenated dataframe with the selected metrics for all models.
"""
assert self.phase in ["pre", "in", "post"], "Phase must be either pre, in or post"
dataframes = []
for category in self.pipelines:
for pipeline in self.pipelines[category]:
df = self.pipelines[category][pipeline].modelling.results_analysis[self.phase].phase_results_df
dataframes.append(df)
metrics_df = pd.concat(dataframes)
self.results_per_phase[self.phase]["metrics_df"] = metrics_df
print(f"Metrics df: {metrics_df.head(1)}")
num_metrics = len(metrics)
cols = 2
rows = math.ceil(num_metrics / cols)
fig, axes = plt.subplots(rows, cols, figsize=(cols * 6, rows * 5))
axes = axes.flatten() # Flatten to iterate easily, even if 1 row
for i, metric in enumerate(metrics):
ax = axes[i]
sns.barplot(data=metrics_df, x='modelName', y=metric, ax=ax, palette="viridis")
ax.set_title(f'{metric} by Model')
ax.set_xlabel('Model Name')
ax.set_ylabel(metric)
ax.tick_params(axis='x', rotation=45)
# Annotate values
for container in ax.containers:
ax.bar_label(container, fmt='%.4f', label_type='edge')
# Hide any unused subplots
for j in range(i + 1, len(axes)):
fig.delaxes(axes[j])
plt.tight_layout()
plt.suptitle(f"Model Performance - {self.phase} phase")
plt.tight_layout(rect=[0, 0, 1, 0.96])
save_or_store_plot(fig, save_plots, directory_path=save_path + f"/{self.phase}/model_performance", filename=f"time_based_model_performance_{self.phase}.png")
return metrics_df
[docs]
def plot_feature_importance(self, save_plots: bool = False, save_path: str = None):
"""
Plots feature importance for each model in the current phase. Uses built-in importance attributes or permutation importance.
Only plots top features and optionally saves the results to disk.
Parameters
----------
save_plots : bool, optional
Whether to save the generated plots to disk (default is False).
save_path : str, optional
Directory path where plots should be saved if save_plots is True.
Returns
-------
None
"""
assert self.phase in ["pre", "in", "post"], "Phase must be either pre, in or post"
importances_dfs = {}
for pipeline in self.pipelines["not_baseline"]:
models = self.pipelines["not_baseline"][pipeline].modelling
for modelName in models.list_of_models:
# only keep the best model in post-phase
if self.phase == "post" and modelName != self.best_performing_model["modelName"]:
continue
# skip excluded models
if modelName in models.models_to_exclude:
continue
model = models.list_of_models[modelName]
ds = self.pipelines["not_baseline"][pipeline].dataset
# pick the right split
if self.phase == "in":
X, y = ds.X_val, ds.y_val
elif self.phase == "post":
X, y = ds.X_test, ds.y_test
else:
X, y = ds.X_train, ds.y_train
# compute importances
if hasattr(model, "feature_importances_"):
importances = model.feature_importances_
elif hasattr(model, "coef_"):
importances = np.abs(model.coef_).ravel()
else:
# if your dataset is huge, sample by POSITION not by label
if len(X) > 1000:
# get 1 000 random *positions*
pos = np.random.RandomState(42).choice(len(X), size=1000, replace=False)
# .iloc will slice by position
X_sub = X.iloc[pos]
y_sub = y.iloc[pos] if hasattr(y, "iloc") else y[pos]
result = permutation_importance(
model,
X_sub, y_sub,
n_repeats=3,
random_state=42,
n_jobs=-1 # ← use all your cores
)
else:
result = permutation_importance(
model, X, y,
n_repeats=3,
random_state=42,
n_jobs=-1
)
importances = result.importances_mean
# sort
idx = np.argsort(importances)[::-1]
feats_sorted = X.columns.values[idx]
imps_sorted = importances[idx]
importances_dfs[(pipeline, modelName)] = (feats_sorted, imps_sorted)
# only plot top_n bars
top_n = 30
feats_plot = feats_sorted[:top_n]
imps_plot = imps_sorted[:top_n]
# cap the figure height
height = min(12, max(4, len(feats_plot) * 0.3))
fig, ax = plt.subplots(figsize=(8, height))
y_pos = np.arange(len(feats_plot))
ax.barh(y_pos, imps_plot)
ax.set_yticks(y_pos)
ax.set_yticklabels(feats_plot)
ax.invert_yaxis()
ax.set_xlabel("Importance")
ax.set_title(f"Feature Importances for {modelName} ({pipeline})")
plt.tight_layout()
if save_plots:
save_or_store_plot(
fig,
save_plots,
directory_path=save_path + f"/{self.phase}/feature_importance",
filename=f"feature_importance_{self.phase}_{pipeline}_{modelName}.png"
)
plt.close(fig)
return None
[docs]
def lime_feature_importance(self, save_plots: bool = False, save_path: str = None):
"""
Computes and plots LIME-based feature importances for ensembled models in the current phase.
Generates barplots of the top contributing features for a single sample.
Parameters
----------
save_plots : bool, optional
Whether to save the generated LIME plots to disk (default is False).
save_path : str, optional
Directory path where plots should be saved if save_plots is True.
Returns
-------
dict
Dictionary mapping each pipeline to its LIME feature importance DataFrame.
"""
assert self.phase in ["pre", "in", "post"], "Phase must be either pre, in or post"
lime_importances_dfs = {}
for pipeline in self.pipelines["not_baseline"]:
if pipeline not in ["ensembled"]:
continue
for modelName in self.pipelines["not_baseline"][pipeline].modelling.list_of_models:
if self.phase == "post" and modelName != self.best_performing_model["modelName"]:
continue
if modelName not in self.pipelines["not_baseline"][pipeline].modelling.models_to_exclude:
model = self.pipelines["not_baseline"][pipeline].modelling.list_of_models[modelName]
X_train = self.pipelines["not_baseline"][pipeline].dataset.X_train
X_sample = X_train.iloc[0]
explainer = lime.lime_tabular.LimeTabularExplainer(
training_data=X_train.values,
feature_names=X_train.columns.tolist(),
mode = "classification" if len(set(model.predict_default(X_train))) > 2 else "regression"
)
explanation = explainer.explain_instance(X_sample.values, model.predict_proba)
explanation_list = explanation.as_list()
feature_importances = {feature: weight for feature, weight in explanation_list}
feature_importance_df = pd.DataFrame({
'Feature': list(feature_importances.keys()),
'Importance': list(feature_importances.values())
}).sort_values(by='Importance', ascending=False)
lime_importances_dfs[pipeline] = feature_importance_df
for pipeline in lime_importances_dfs:
fig, ax = plt.subplots(figsize=(10, 10))
sns.barplot(
x="Importance",
y="Feature",
data=lime_importances_dfs[pipeline],
ax=ax
)
ax.set_title(f"LIME explanation for {pipeline} model")
plt.tight_layout()
plt.tight_layout(rect=[0, 0, 1, 0.96])
save_or_store_plot(fig, save_plots, directory_path=save_path + f"/{self.phase}/modelName/lime_feature_importance", filename=f"lime_feature_importance_{self.phase}.png")
return lime_importances_dfs
[docs]
def plot_multiclass_reliability_diagram(self, save_plots: bool = False, save_path: str = None):
"""
Plots multiclass reliability diagrams (one-vs-rest) for ensembled or tree-based models.
Each class's calibration curve is displayed to assess probabilistic calibration quality.
Parameters
----------
save_plots : bool, optional
Whether to save the generated plots to disk (default is False).
save_path : str, optional
Directory path where plots should be saved if save_plots is True.
Returns
-------
None
"""
assert self.phase in ["pre", "in", "post"], "Phase must be either pre, in or post"
# Only iterate over non-baseline pipelines
for pipeline_name, pipeline_obj in self.pipelines.get("not_baseline", {}).items():
m = pipeline_obj.modelling
ds = pipeline_obj.dataset
for model_name, model in m.list_of_models.items():
# Exclude unwanted models
if model_name in m.models_to_exclude:
continue
if self.phase == "post" and model_name != self.best_performing_model["modelName"]:
continue
# Grab train + (optional) calib splits
X_train, y_train = ds.X_train, ds.y_train
X_calib = getattr(ds, "X_calib", None)
y_calib = getattr(ds, "y_calib", None)
if X_calib is None or y_calib is None:
X_train, X_calib, y_train, y_calib = train_test_split(
X_train, y_train, test_size=0.2, random_state=42
)
# Ensure predict_proba exists
if not hasattr(model, "predict_proba"):
raise RuntimeError(f"Model {model_name!r} has no predict_proba—cannot plot reliability.")
# Get raw probabilities on calibration set
y_probs = model.predict_proba(X_calib)
# Plot one curve per class
n_classes = y_probs.shape[1]
class_labels = getattr(ds, "class_labels", list(range(n_classes)))
fig, ax = plt.subplots(figsize=(8, 6))
for i in range(n_classes):
y_true_bin = (y_calib == i).astype(int)
prob_true, prob_pred = calibration_curve(y_true_bin, y_probs[:, i], n_bins=10)
ax.plot(prob_pred, prob_true, marker="o", label=f"Class {class_labels[i]}")
ax.plot([0, 1], [0, 1], linestyle="--", color="gray", label="Perfectly Calibrated")
ax.set_xlabel("Mean Predicted Probability")
ax.set_ylabel("True Fraction of Positives")
ax.set_title(f"Reliability Diagram — {model_name} ({pipeline_name}) — {self.phase}")
ax.legend(loc="best")
ax.grid(True)
plt.tight_layout(rect=[0, 0, 1, 0.96])
# Build output directory and filename
if save_path:
out_dir = os.path.join(save_path, self.phase, "model_calibration")
else:
out_dir = None
filename = f"model_calibration_{model_name}_{self.phase}.png"
save_or_store_plot(fig, save_plots, directory_path=out_dir, filename=filename)
plt.close(fig)
return None
[docs]
def plot_confusion_matrix(self, save_plots: bool = False, save_path: str = None):
"""
Plots both absolute and relative confusion matrices for all models in the current phase.
For each applicable model, this function computes and displays:
- An absolute confusion matrix (raw counts).
- A relative confusion matrix (normalized by actual class totals, in %).
Conditions such as model exclusions, phase-specific logic, and baseline filtering
are handled internally.
Parameters
----------
save_plots : bool, optional
Whether to save the generated plot to disk. Default is False.
save_path : str, optional
Path to the directory where plots will be saved (if save_plots is True).
Returns
-------
residuals : dict
Dictionary mapping each pipeline to its residuals (misclassified examples).
confusion_matrices : dict
Dictionary mapping each model name to its absolute and relative confusion matrices.
"""
assert self.phase in ["pre", "in", "post"], "Phase must be either pre, in or post"
confusion_matrices = {}
residuals = {}
for category in self.pipelines:
for pipeline in self.pipelines[category]:
for modelName in self.pipelines[category][pipeline].modelling.list_of_models:
if modelName not in self.pipelines[category][pipeline].modelling.models_to_exclude:
if category == "not_baseline" and self.phase == "post" and modelName != self.best_performing_model["modelName"]:
continue
if self.phase == "in" and category == "baseline":
continue
if self.phase != "post":
pred = self.pipelines[category][pipeline].modelling.list_of_models[modelName].tuning_states[self.phase].assesment["predictions_val"]
actual = self.pipelines[category][pipeline].modelling.dataset.y_val
residuals[pipeline] = self.pipelines[category][pipeline].modelling.dataset.y_val[pred != actual]
else:
pred = self.pipelines[category][pipeline].modelling.list_of_models[modelName].tuning_states[self.phase].assesment["predictions_test"]
actual = self.pipelines[category][pipeline].modelling.dataset.y_test
residuals[pipeline] = self.pipelines[category][pipeline].modelling.dataset.y_test[pred != actual]
assert pred is not None, "Predictions are None"
assert actual is not None, "Actual is None"
assert len(pred) == len(actual), "Predictions and actual must be of the same length"
cm = confusion_matrix(actual, pred)
confusion_matrices[modelName] = {
"absolute": cm,
"relative": cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100
}
fig, axes = plt.subplots(len(confusion_matrices), 2, figsize=(15, 5* len(confusion_matrices)))
# Convert axes to 2D array if there's only one model
if len(confusion_matrices) == 1:
axes = np.array([axes])
# Get category labels if encoded_map exists
labels = None
if self.encoded_map is not None:
# Sort by encoded value to ensure correct order
labels = [label for label in self.encoded_map]
assert labels is not None, "Labels are None"
for i, (modelName, cm_data) in enumerate(confusion_matrices.items()):
print(f"Plotting: {modelName}")
# Absolute Confusion Matrix (meaning it does not have the percentage of class predictionsm)
sns.heatmap(cm_data["absolute"],
annot=True,
fmt='d',
cmap='Blues',
ax=axes[i, 0],
xticklabels=labels,
yticklabels=labels)
axes[i, 0].set_title(f"Absolute Confusion Matrix for model: {modelName}")
axes[i, 0].set_xlabel("Predicted")
axes[i, 0].set_ylabel("Actual")
# Relative Confusion Matrix
sns.heatmap(cm_data["relative"],
annot=True,
fmt='.1f',
cmap='Blues',
ax=axes[i, 1],
xticklabels=labels,
yticklabels=labels)
axes[i, 1].set_title(f"Relative Confusion Matrix for model: {modelName}")
axes[i, 1].set_xlabel("Predicted")
axes[i, 1].set_ylabel("Actual")
plt.tight_layout()
plt.suptitle(f"Confusion Matrix - {self.phase} phase")
plt.tight_layout(rect=[0, 0, 1, 0.96])
save_or_store_plot(fig, save_plots, directory_path=save_path + f"/{self.phase}/model_performance", filename=f"confusion_matrices_{self.phase}.png")
return residuals, confusion_matrices
[docs]
def plot_residuals(self, save_plots: bool = False, save_path: str = None):
"""
Generates diagnostic plots of residuals for each model in the current phase.
For each applicable model, this function computes residuals and produces a 2x2 grid of:
1. Residuals vs. Predicted values
2. Residuals vs. Observed values
3. Histogram of residuals with KDE
4. QQ-plot of residuals to assess normality
Titles each figure as: “Residual plots for {modelName} in {phase} phase”.
Filters models according to phase, category, and exclusion rules.
Saves plots if `save_plots` is True.
Parameters
----------
save_plots : bool, optional
Whether to save the generated plots to disk. Default is False.
save_path : str, optional
Directory path where plots should be saved (used if save_plots is True).
Returns
-------
None
"""
assert self.phase in ["pre", "in", "post"], "Phase must be pre, in or post"
residuals = {}
for category in self.pipelines:
for pipeline in self.pipelines[category]:
m = self.pipelines[category][pipeline].modelling
for modelName in m.list_of_models:
# same include/exclude logic as plot_confusion_matrix
if modelName in m.models_to_exclude:
continue
if category == "not_baseline" and self.phase == "post" \
and modelName != self.best_performing_model["modelName"]:
continue
if self.phase == "in" and category == "baseline":
continue
# exactly like plot_confusion_matrix:
model_obj = m.list_of_models[modelName]
if self.phase != "post":
preds = model_obj.tuning_states[self.phase].assesment["predictions_val"]
actuals = m.dataset.y_val
else:
preds = model_obj.tuning_states[self.phase].assesment["predictions_test"]
actuals = m.dataset.y_test
assert preds is not None, f"No predictions for {modelName}"
assert actuals is not None, f"No actuals for {modelName}"
assert len(preds) == len(actuals)
res = actuals - preds
residuals[modelName] = res
# build 2×2 figure
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()
fig.suptitle(f"Residual plots for {modelName} in {self.phase} phase")
# 1) vs Predicted
axes[0].scatter(preds, res, alpha=0.6)
axes[0].axhline(0, linestyle="--")
axes[0].set_xlabel("Predicted")
axes[0].set_ylabel("Residual")
axes[0].set_title("Residuals vs Predicted")
# 2) vs Observed
axes[1].scatter(actuals, res, alpha=0.6)
axes[1].axhline(0, linestyle="--")
axes[1].set_xlabel("Observed")
axes[1].set_ylabel("Residual")
axes[1].set_title("Residuals vs Observed")
# 3) Histogram
sns.histplot(res, kde=True, ax=axes[2])
axes[2].set_title("Histogram of Residuals")
# 4) QQ-Plot
stats.probplot(res, dist="norm", plot=axes[3])
axes[3].set_title("QQ-Plot of Residuals")
plt.tight_layout(rect=[0,0,1,0.95])
# save using same structure as confusion_matrix
save_or_store_plot(
fig,
save_plots,
directory_path=save_path + f"/{self.phase}/model_performance",
filename=f"residuals_{modelName}_{self.phase}.png"
)
plt.close(fig)
return None
[docs]
def plot_results_summary(self, training_metric: str, performance_metric: str, save_plots: bool = False, save_path: str = None):
"""
Generates a scatterplot relating a training or prediction time metric
to a classification performance metric for models in the current phase.
The x-axis represents the time metric ("timeToFit" or "timeToPredict") on a log scale,
and the y-axis shows the classification performance metric, adjusted based on the phase
("pre", "in", or "post") to use either validation or test evaluation.
Each point represents a model and is labeled with its name.
Parameters
----------
training_metric : str
Time metric for the x-axis. Must be either "timeToFit" or "timeToPredict".
performance_metric : str
Performance metric for the y-axis. Must be a valid classification metric.
save_plots : bool, optional
Whether to save the plot to disk. Default is False.
save_path : str, optional
Directory path where plots will be saved if `save_plots` is True.
Returns
-------
None
"""
assert training_metric in ["timeToFit", "timeToPredict"], "training_metric must be either timeToFit or timeToPredict"
assert performance_metric in self.variables["dataset_runner"]["metrics_to_evaluate"]["classification"], "performance_metric must be a classification metric"
if self.phase == "pre" or self.phase == "in":
performance_metric += "_val"
else:
performance_metric += "_test"
metrics_df = self.results_per_phase[self.phase]["metrics_df"]
fig, ax = plt.subplots(figsize=(15, 8))
# draw the scatterplot without legend
sns.scatterplot(
data=metrics_df,
x=training_metric,
y=performance_metric,
hue="modelName",
legend=False,
s=150,
alpha=0.7,
ax=ax
)
for _, row in metrics_df.iterrows():
plt.annotate(
row["modelName"],
(row[training_metric], row[performance_metric]),
textcoords="offset points",
xytext=(5, 5),
ha='left',
va='bottom',
fontsize=9
)
plt.xlabel(f"{training_metric} (log scale)")
plt.ylabel(performance_metric)
plt.title(f"Model Performance: {training_metric} vs. {performance_metric}")
plt.tight_layout()
plt.ylim(0, 1)
plt.grid(True)
plt.xscale("log")
save_or_store_plot(fig, save_plots, directory_path=save_path + f"/{self.phase}/model_performance", filename=f"results_summary_{self.phase}.png")
[docs]
def plot_per_epoch_progress(self, metrics: list[str], save_plots: bool = False, save_path: str = None):
"""
Plots the progression of specified metrics over training epochs for a neural network model.
This function initializes a NeuralNetsPlots object for the feed-forward neural network model
corresponding to the current phase, and delegates the plotting of per-epoch metric progress
to that object.
Parameters
----------
metrics : list of str
List of metric names to plot over epochs.
save_plots : bool, optional
Whether to save the generated plots. Default is False.
save_path : str, optional
Directory path where plots will be saved if `save_plots` is True.
Returns
-------
None
"""
self.neural_nets_plots = NeuralNetsPlots(self.pipelines["not_baseline"]["feed_forward_neural_network"].modelling.list_of_models["Feed Forward Neural Network"].tuning_states[self.phase].assesment["model_sklearn"])
self.neural_nets_plots.plot_per_epoch_progress(metrics, phase=self.phase, save_plots=save_plots, save_path=save_path)