Source code for library.phases.phases_implementation.modelling.shallow.model_definition.model_states.model_state

from abc import ABC, abstractmethod
import numpy as np
import time
from library.phases.phases_implementation.dataset.dataset import Dataset
from library.phases.phases_implementation.modelling.shallow.model_optimization.model_optimization import Optimizer

from library.utils.ownModels.neuralNets.feedForward import FeedForwardNeuralNetwork

from library.utils.decorators.timer import timer


"""

Assesment currently has the following structure:
- `id`: NoneType
- `timeStamp`: NoneType
- `comments`: NoneType
- `modelName`: str
- `status`: str
- `features_used`: NoneType
- `hyperParameters`: NoneType
- `timeToFit`: float
- `timeToPredict`: float
- `accuracy`: float
- `precision`: float
- `recall`: float
- `f1-score`: float
- `predictions_val`: numpy.ndarray
- `precictions_train`: numpy.ndarray
- `predictions_test`: numpy.ndarray
- `model_sklearn`: sklearn
"""


[docs] class ModelState(ABC): def __init__(self, model_sklearn: object, modelName: str, model_type: str, dataset: Dataset, results_header: list[str]): """ This is the base class for all the model **states**. Parameters ---------- model_sklearn : object The model to be used modelName : str """ self.model_sklearn = model_sklearn self.modelName = modelName self.model_type = model_type self.dataset = dataset self.assesment = {column_name: None for column_name in results_header} self.assesment["modelName"] = modelName
[docs] @abstractmethod def get_fit_data(self): """ Varies over each state (in post its training + val for instance) """ pass
[docs] @abstractmethod def get_predict_data(self): pass
[docs] @abstractmethod def fit(self): pass
[docs] @abstractmethod def predict(self, is_training: bool = False): pass
[docs] class PreTuningState(ModelState): def __init__(self, model_sklearn: object, modelName: str, model_type: str, dataset: Dataset, results_header: list[str]): super().__init__(model_sklearn, modelName, model_type, dataset, results_header)
[docs] def get_fit_data(self): if self.model_type == "neural_network": return self.dataset.X_train, self.dataset.y_train, self.dataset.X_val, self.dataset.y_val else: return self.dataset.X_train, self.dataset.y_train
[docs] def get_predict_data(self): return { "training":self.dataset.X_train, "not-training": self.dataset.X_val }
[docs] def fit(self): print(f"Sklearn model: {self.model_sklearn}") start_time = time.time() print(f"!> Started fitting {self.modelName}") if self.model_type == "neural_network": X_data, y_data, X_val, y_val = self.get_fit_data() else: X_data, y_data = self.get_fit_data() print(f"Lenght of X_data: {X_data.shape[0]}") if self.model_type == "neural_network": self.assesment["model_sklearn"] = self.model_sklearn.fit(X_data, y_data, X_val=X_val, y_val=y_val, isOptimizedVersion=False) else: self.assesment["model_sklearn"] = self.model_sklearn.fit(X_data, y_data) end_time = time.time() time_taken = end_time - start_time self.assesment["timeToFit"] = time_taken print(f"\t\t => Fitted {self.modelName}. Took {time_taken} seconds")
[docs] def predict(self): data = self.get_predict_data() start_time = time.time() print(f"!> Started predicting {self.modelName}") # Predict training data training_data = data["training"] print(f"Training data: {training_data.shape}") self.assesment["predictions_train"] = self.model_sklearn.predict(training_data) # Predict not training data not_training_data = data["not-training"] print(f"Not training data: {not_training_data.shape}") self.assesment["predictions_val"] = self.model_sklearn.predict(not_training_data) end_time = time.time() time_taken = end_time - start_time self.assesment["timeToPredict"] = time_taken print(f"\t\t => Predicted {self.modelName}. Took {time_taken} seconds")
[docs] class InTuningState(ModelState): def __init__(self, model_sklearn: object, modelName: str, dataset: Dataset, results_header: list[str], model_type: str = "classical"): super().__init__(model_sklearn, modelName, dataset, results_header, model_type)
[docs] def get_fit_data(self): return self.dataset.X_train, self.dataset.y_train
[docs] def get_predict_data(self): return { "training":self.dataset.X_train, "not-training": self.dataset.X_val }
[docs] def fit(self, **kwargs): param_grid = kwargs.get("param_grid", None) max_iter = kwargs.get("max_iter", None) optimizer_type = kwargs.get("optimizer_type", None) model_object = kwargs.get("model_object", None) print(f"Model object: {model_object} for {self.modelName}") assert self.model_type is not None, f"Model object must have a model_type. {self.modelName}. Model object: {model_object}" if self.model_type == "neural_network": epochs = kwargs.get("epochs", None) else: epochs = None assert optimizer_type in ["grid", "random", "bayes", "bayes_nn"], "Optimizer type must be one of the following: grid, random, bayes, bayes_nn" assert max_iter is not None, "Max iter must be provided" assert model_object is not None, "Model object must be provided" print(f"Model object: {model_object}") if self.model_type == "neural_network": self.optimizer = Optimizer( model_sklearn=self.model_sklearn, modelName=self.modelName, model_object=model_object, dataset=self.dataset, optimizer_type=optimizer_type, param_grid=param_grid, max_iter=max_iter, epochs=epochs) else: self.optimizer = Optimizer( model_sklearn=self.model_sklearn, modelName=self.modelName, model_object=model_object, dataset=self.dataset, optimizer_type=optimizer_type, param_grid=param_grid, max_iter=max_iter) time_start = time.time() self.optimizer.fit() time_end = time.time() time_taken = time_end - time_start self.assesment["timeToFit"] = time_taken if optimizer_type != "bayes_nn": self.model_sklearn = self.optimizer.optimizer.best_estimator_ self.assesment["model_sklearn"] = self.model_sklearn else: best_model = self.optimizer.optimizer.get_best_models(num_models=1)[0] best_hps = self.optimizer.optimizer.get_best_hyperparameters(num_trials=1)[0] best_params = best_hps.values n_layers = best_params["n_layers"] learning_rate = best_params["learning_rate"] units_per_layers = [] activations = [] for i in range(n_layers): units_per_layers.append(best_params[f"units_{i}"]) activations.append(best_params[f"act_{i}"]) print(f"Best params: {best_params}") self.model_sklearn = FeedForwardNeuralNetwork( num_features=self.dataset.X_train.shape[1], num_classes=self.dataset.y_train.value_counts().shape[0], n_layers=n_layers, units_per_layer=units_per_layers, activations=activations, learning_rate=learning_rate) self.model_sklearn.model = best_model self.assesment["model_sklearn"] = self.model_sklearn self.model_sklearn.is_fitted_ = True
[docs] def predict(self): if self.model_type == "stacking": print(f"ESTIMTORS AT PREDICTION ARE: {self.model_sklearn.estimators}") data = self.get_predict_data() start_time = time.time() print(f"!> Started predicting {self.modelName}") # Predict training data print(f"Predicting training data") training_data = data["training"] self.assesment["predictions_train"] = self.model_sklearn.predict(training_data) # Predict not training data print(f"Predicting not training data") not_training_data = data["not-training"] self.assesment["predictions_val"] = self.model_sklearn.predict(not_training_data) end_time = time.time() time_taken = end_time - start_time self.assesment["timeToPredict"] = time_taken print(f"\t\t => Predicted {self.modelName}. Took {time_taken} seconds")
[docs] def plot_convergence(self): self.optimizer.plot_convergence()
[docs] class PostTuningState(ModelState): def __init__(self, model_sklearn: object, modelName: str, dataset: Dataset, results_header: list[str], model_type: str = "classical"): super().__init__(model_sklearn, modelName, dataset, results_header, model_type)
[docs] def get_fit_data(self): self.X_train_combined = np.vstack([self.dataset.X_train, self.dataset.X_val]) self.y_train_combined = np.concatenate([self.dataset.y_train, self.dataset.y_val]) print(f"X_train_combined: {self.X_train_combined.shape}") return self.X_train_combined, self.y_train_combined
[docs] def get_predict_data(self): return { "training": self.X_train_combined, "not-training": self.dataset.X_test }
[docs] def fit(self, **kwargs): print(f"Sklearn model: {self.model_sklearn}") start_time = time.time() print(f"!> Started fitting {self.modelName}") X_data, y_data = self.get_fit_data() print(f"Lenght of X_data: {X_data.shape[0]}") self.assesment["model_sklearn"] = self.model_sklearn.fit(X_data, y_data) end_time = time.time() time_taken = end_time - start_time self.assesment["timeToFit"] = time_taken print(f"\t\t => Fitted {self.modelName}. Took {time_taken} seconds")
[docs] def predict(self): start_time = time.time() print(f"!> Started predicting {self.modelName}") data = self.get_predict_data() # Predict training data training_data = data["training"] assert training_data.shape[0] == self.X_train_combined.shape[0] == self.y_train_combined.shape[0], f"Training data shape: {training_data.shape} does not match X_train_combined shape: {self.X_train_combined.shape} or y_train_combined shape: {self.y_train_combined.shape}" prediction_train = self.model_sklearn.predict(training_data) assert len(prediction_train) == self.y_train_combined.shape[0], f"Prediction train shape: {prediction_train.shape} does not match y_train_combined shape: {self.y_train_combined.shape}" self.assesment["predictions_train"] = prediction_train # Predict not training data not_training_data = data["not-training"] self.assesment["predictions_test"] = self.model_sklearn.predict(not_training_data) end_time = time.time() time_taken = end_time - start_time self.assesment["timeToPredict"] = time_taken print(f"\t\t => Predicted {self.modelName}. Took {time_taken} seconds")