Source code for library.phases.phases_implementation.feature_analysis.feature_transformation.strategies.main

from abc import ABC, abstractmethod
from library.phases.phases_implementation.dataset.dataset import Dataset
import numpy as np
import matplotlib.pyplot as plt


[docs]
class BaseStrategy(ABC):
    def __init__(self, dataset: Dataset) -> None:
        self.dataset = dataset


[docs]
    @abstractmethod
    def transform_target(self, plot: bool = False):
        pass

    

[docs]
    @abstractmethod
    def inverse_transform_target(self):
        pass




[docs]
class LogStrategy(BaseStrategy):
    def __init__(self, dataset: Dataset) -> None:
        super().__init__(dataset)


[docs]
    def transform_target(self, plot: bool = False):
        if plot:
            fig, ax = plt.subplots(2, 3, figsize=(15, 8))
            plt.tight_layout(h_pad=2, w_pad=3)  # Add padding between subplots
            
            # Before transformation plots
            ax[0, 0].hist(self.dataset.y_train, bins=100, edgecolor='#1f77b4', color='#1f77b4', alpha=0.7)
            ax[0, 0].set_title('Distribution of Target Variable (Train)')
            ax[0, 0].set_xlabel('Target Value')
            ax[0, 0].set_ylabel('Frequency')
            
            ax[0, 1].hist(self.dataset.y_val, bins=100, edgecolor='#1f77b4', color='#1f77b4', alpha=0.7)
            ax[0, 1].set_title('Distribution of Target Variable (Validation)')
            ax[0, 1].set_xlabel('Target Value')
            ax[0, 1].set_ylabel('Frequency')
            
            ax[0, 2].hist(self.dataset.y_test, bins=100, edgecolor='#1f77b4', color='#1f77b4', alpha=0.7)
            ax[0, 2].set_title('Distribution of Target Variable (Test)')
            ax[0, 2].set_xlabel('Target Value')
            ax[0, 2].set_ylabel('Frequency')

        # Apply log transformation
        self.dataset.y_train = np.log(self.dataset.y_train)
        self.dataset.y_val = np.log(self.dataset.y_val)
        self.dataset.y_test = np.log(self.dataset.y_test)

        if plot:
            ax[1, 0].hist(self.dataset.y_train, bins=100, edgecolor='#2ca02c', color='#2ca02c', alpha=0.7)
            ax[1, 0].set_title('Log-Transformed Distribution (Train)')
            ax[1, 0].set_xlabel('Log(Target Value)')
            ax[1, 0].set_ylabel('Frequency')
            
            ax[1, 1].hist(self.dataset.y_val, bins=100, edgecolor='#2ca02c', color='#2ca02c', alpha=0.7)
            ax[1, 1].set_title('Log-Transformed Distribution (Validation)')
            ax[1, 1].set_xlabel('Log(Target Value)')
            ax[1, 1].set_ylabel('Frequency')
            
            ax[1, 2].hist(self.dataset.y_test, bins=100, edgecolor='#2ca02c', color='#2ca02c', alpha=0.7)
            ax[1, 2].set_title('Log-Transformed Distribution (Test)')
            ax[1, 2].set_xlabel('Log(Target Value)')
            ax[1, 2].set_ylabel('Frequency')
            
            plt.show() 



[docs]
    def inverse_transform_target(self):
        self.dataset.y_train = np.exp(self.dataset.y_train)
        self.dataset.y_val = np.exp(self.dataset.y_val)
        self.dataset.y_test = np.exp(self.dataset.y_test)
Source code for library.phases.phases_implementation.feature_analysis.feature_transformation.strategies.main

Efficient Malware Classfier

Navigation

Related Topics