Source code for library.phases.phases_implementation.feature_analysis.feature_transformation.strategies.main

from abc import ABC, abstractmethod
from library.phases.phases_implementation.dataset.dataset import Dataset
import numpy as np
import matplotlib.pyplot as plt

[docs] class BaseStrategy(ABC): def __init__(self, dataset: Dataset) -> None: self.dataset = dataset
[docs] @abstractmethod def transform_target(self, plot: bool = False): pass
[docs] @abstractmethod def inverse_transform_target(self): pass
[docs] class LogStrategy(BaseStrategy): def __init__(self, dataset: Dataset) -> None: super().__init__(dataset)
[docs] def transform_target(self, plot: bool = False): if plot: fig, ax = plt.subplots(2, 3, figsize=(15, 8)) plt.tight_layout(h_pad=2, w_pad=3) # Add padding between subplots # Before transformation plots ax[0, 0].hist(self.dataset.y_train, bins=100, edgecolor='#1f77b4', color='#1f77b4', alpha=0.7) ax[0, 0].set_title('Distribution of Target Variable (Train)') ax[0, 0].set_xlabel('Target Value') ax[0, 0].set_ylabel('Frequency') ax[0, 1].hist(self.dataset.y_val, bins=100, edgecolor='#1f77b4', color='#1f77b4', alpha=0.7) ax[0, 1].set_title('Distribution of Target Variable (Validation)') ax[0, 1].set_xlabel('Target Value') ax[0, 1].set_ylabel('Frequency') ax[0, 2].hist(self.dataset.y_test, bins=100, edgecolor='#1f77b4', color='#1f77b4', alpha=0.7) ax[0, 2].set_title('Distribution of Target Variable (Test)') ax[0, 2].set_xlabel('Target Value') ax[0, 2].set_ylabel('Frequency') # Apply log transformation self.dataset.y_train = np.log(self.dataset.y_train) self.dataset.y_val = np.log(self.dataset.y_val) self.dataset.y_test = np.log(self.dataset.y_test) if plot: ax[1, 0].hist(self.dataset.y_train, bins=100, edgecolor='#2ca02c', color='#2ca02c', alpha=0.7) ax[1, 0].set_title('Log-Transformed Distribution (Train)') ax[1, 0].set_xlabel('Log(Target Value)') ax[1, 0].set_ylabel('Frequency') ax[1, 1].hist(self.dataset.y_val, bins=100, edgecolor='#2ca02c', color='#2ca02c', alpha=0.7) ax[1, 1].set_title('Log-Transformed Distribution (Validation)') ax[1, 1].set_xlabel('Log(Target Value)') ax[1, 1].set_ylabel('Frequency') ax[1, 2].hist(self.dataset.y_test, bins=100, edgecolor='#2ca02c', color='#2ca02c', alpha=0.7) ax[1, 2].set_title('Log-Transformed Distribution (Test)') ax[1, 2].set_xlabel('Log(Target Value)') ax[1, 2].set_ylabel('Frequency') plt.show()
[docs] def inverse_transform_target(self): self.dataset.y_train = np.exp(self.dataset.y_train) self.dataset.y_val = np.exp(self.dataset.y_val) self.dataset.y_test = np.exp(self.dataset.y_test)