Source code for library.phases.runners.featureAnalysis_runner



from library.utils.phase_runner_definition.phase_runner import PhaseRunner
from library.pipeline.pipeline_manager import PipelineManager
from library.pipeline.pipeline import Pipeline

[docs] class FeatureAnalysisRunner(PhaseRunner): def __init__(self, pipeline_manager: PipelineManager, include_plots: bool = False, save_path: str = "") -> None: super().__init__(pipeline_manager, include_plots, save_path) def _run_feature_transformation(self) -> None: # Do not add encoding, this is added in _feature_encoding_helper() in dataPreprocessing_runner.py pass def _update_pipelines_datasets(self, reference_pipeline: Pipeline) -> None: """ Updates the dataset with the new features. """ # Not baseline for pipeline in self.pipeline_manager.pipelines["not_baseline"]: pipeline.dataset.X_train = reference_pipeline.dataset.X_train pipeline.dataset.X_val = reference_pipeline.dataset.X_val pipeline.dataset.X_test = reference_pipeline.dataset.X_test # Baseline for pipeline in self.pipeline_manager.pipelines["baseline"]: pipeline.dataset.df = reference_pipeline.dataset.df def _run_manual_feature_selection(self) -> None: """ A pipeline-specific procedure is then applied to all other pipelines. Think of a brief convergence after divergence of the pipelines occured. """ reference_category = "not_baseline" reference_pipeline = "tree_based" # 1) Mutual Information self.pipeline_manager.pipelines[reference_category][reference_pipeline].feature_analysis.feature_selection.manual_feature_selection.fit( type="MutualInformation", threshold=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["mutual_information"]["threshold"], delete_features=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["mutual_information"]["delete_features"], save_plots=self.include_plots, save_path=self.save_path ) # 2) Low Variances self.pipeline_manager.pipelines[reference_category][reference_pipeline].feature_analysis.feature_selection.manual_feature_selection.fit( type="LowVariances", threshold=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["low_variances"]["threshold"], delete_features=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["low_variances"]["delete_features"], save_plots=self.include_plots, save_path=self.save_path ) # 3) VIF self.pipeline_manager.pipelines[reference_category][reference_pipeline].feature_analysis.feature_selection.manual_feature_selection.fit( type="VIF", threshold=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["vif"]["threshold"], delete_features=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["vif"]["delete_features"], save_plots=self.include_plots, save_path=self.save_path ) # # 4) PCA # self.pipeline_manager.pipelines["not_baseline"]["tree_based"].feature_analysis.feature_selection.manual_feature_selection.fit( # type="PCA", # threshold=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["pca"]["threshold"], # delete_features=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["pca"]["delete_features"], # save_plots=self.include_plots, # save_path=self.save_path # ) # self.pipeline_manager.all_pipelines_execute(methodName="feature_analysis.feature_selection.manual_feature_selection.fit", # verbose=True, # type="MutualInformation", # threshold=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["mutual_information"]["threshold"], # delete_features=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["mutual_information"]["delete_features"], # save_plots=self.include_plots, # save_path=self.save_path, # exclude_pipeline_names=["support_vector_machine"] # ) # # 2) Low Variances # self.pipeline_manager.all_pipelines_execute(methodName="feature_analysis.feature_selection.manual_feature_selection.fit", # verbose=True, # type="LowVariances", # threshold=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["low_variances"]["threshold"], # delete_features=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["low_variances"]["delete_features"], # save_plots=self.include_plots, # save_path=self.save_path, # exclude_pipeline_names=["support_vector_machine"] # ) # # 3) VIF # self.pipeline_manager.all_pipelines_execute(methodName="feature_analysis.feature_selection.manual_feature_selection.fit", # verbose=True, # type="VIF", # threshold=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["vif"]["threshold"], # delete_features=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["vif"]["delete_features"], # save_plots=self.include_plots, # save_path=self.save_path, # exclude_pipeline_names=["support_vector_machine"] # ) # 4) PCA # self.pipeline_manager.all_pipelines_execute(methodName="feature_analysis.feature_selection.manual_feature_selection.fit", # verbose=True, # type="PCA", # threshold=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["pca"]["threshold"], # delete_features=self.pipeline_manager.variables["feature_analysis_runner"]["manual_feature_selection"]["pca"]["delete_features"], # save_plots=self.include_plots, # save_path=self.save_path # exclude_pipeline_names=["support_vector_machine"] # ) return None def _run_automatic_feature_selection(self) -> None: # # 1) L1 # predictivePowerFeatures, excludedFeatures, coefficients = self.pipeline_manager.all_pipelines_execute(methodName="feature_analysis.feature_selection.automatic_feature_selection.fit", # verbose=False, # type="L1", # max_iter=self.pipeline_manager.variables["feature_analysis_runner"]["automatic_feature_selection"]["l1"]["max_iter"], # delete_features=self.pipeline_manager.variables["feature_analysis_runner"]["automatic_feature_selection"]["l1"]["delete_features"], # ) # 2) Boruta selected_features, excludedFeatures = self.pipeline_manager.all_pipelines_execute(methodName="feature_analysis.feature_selection.automatic_feature_selection.fit", verbose=True, type="Boruta", max_iter=self.pipeline_manager.variables["feature_analysis_runner"]["automatic_feature_selection"]["boruta"]["max_iter"], delete_features=self.pipeline_manager.variables["feature_analysis_runner"]["automatic_feature_selection"]["boruta"]["delete_features"], exclude_pipeline_names=["support_vector_machine"] ) return {"L1": {"predictivePowerFeatures": None, "excludedFeatures": None, "coefficients": None}, "Boruta": {"selected_features": selected_features, "excludedFeatures": excludedFeatures}} def _run_feature_engineering_after_split(self) -> None: # FEDE (expand here) # after split ...
[docs] def run(self) -> None: feature_transformation_results = self._run_feature_transformation() manual_feature_selection_results = self._run_manual_feature_selection() # Comment out cause it goes too slow #automatic_feature_selection_results = self._run_automatic_feature_selection() # Comment out cause it goes too slow self._run_feature_engineering_after_split() return { "feature_transformation_results": feature_transformation_results, "manual_feature_selection_results": None, "automatic_feature_selection_results": None }