Source code for library.phases.phases_implementation.feature_analysis.feature_engineering.feature_engineering


from library.phases.phases_implementation.dataset.dataset import Dataset
from sklearn.preprocessing import PolynomialFeatures

from library.phases.phases_implementation.data_preprocessing.data_preprocessing import Preprocessing

import pandas as pd


[docs]
class FeatureEngineering:
      """
      """
      def __init__(self, dataset: Dataset) -> None:
            self.dataset = dataset


[docs]
      def polynomial_interaction_effects(self, 
                                         degree: int = 2, 
                                         interaction_only: bool = False,
                                         standarize: bool = False, 
                                         scaler: str = "standard"):
        """
        Generates polynomial and interaction features for the dataset.
        
        Parameters
        ----------
        degree : int, optional (default=2)
            The degree of the polynomial features.
        interaction_only : bool, optional (default=False)
            If True, only interaction features are produced: features that are products of 
            at most `degree` distinct input features, without powers of single features.
        standarize : bool, optional (default=False)
            Whether to standardize the resulting features after transformation.
        scaler : str, optional (default="standard")
            Type of scaler to use if `standarize` is True (e.g., "standard" for StandardScaler).
        
        Returns
        -------
        None.  
        Transforms the training, validation, and test feature sets by adding polynomial and interaction terms,
        and optionally scales these new features. Updates the dataset's feature DataFrames in place.
        
        Prints the number of new features added.
        """
        original_number_of_features = self.dataset.X_train.shape[1]
        pol_obj = PolynomialFeatures(degree=degree, interaction_only=interaction_only)
        
        x_arr = pol_obj.fit_transform(self.dataset.X_train)
        new_features_name = pol_obj.get_feature_names_out()
        self.dataset.X_train = pd.DataFrame(x_arr, columns=new_features_name)
        x_arr = pol_obj.fit_transform(self.dataset.X_val)
        self.dataset.X_val = pd.DataFrame(x_arr, columns=new_features_name)
        x_arr = pol_obj.fit_transform(self.dataset.X_test)
        self.dataset.X_test = pd.DataFrame(x_arr, columns=new_features_name)

        if standarize:
            preprocessing = Preprocessing(self.dataset)
            preprocessing.scale_features(scaler=scaler, columnsToScale=new_features_name)
        print(f"Added {self.dataset.X_train.shape[1] - original_number_of_features} features")
Source code for library.phases.phases_implementation.feature_analysis.feature_engineering.feature_engineering

Efficient Malware Classfier

Navigation

Related Topics