Source code for hifis_surveyval.core.preprocess

# hifis-surveyval
# Framework to help developing analysis scripts for the HIFIS Software survey.
#
# SPDX-FileCopyrightText: 2021 HIFIS Software <support@hifis.net>
#
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""This module starts a preprocessing script, if it exists."""
import importlib.util
import logging
import traceback

from hifis_surveyval.core.settings import Settings
from hifis_surveyval.data_container import DataContainer


[docs]class Preprocessor(object): """Provides running a preprocessing script."""
[docs] @classmethod def preprocess( cls, settings: Settings, data: DataContainer ) -> DataContainer: """ Run preprocessing script. Exceptions raised from import will be caught and logged as error on the console. Args: settings (Settings): The settings of the run. data (DataContainer): The data to preprocess. Raises: ImportError: Exception thrown if script could not be loaded. AttributeError: Exception thrown if run method could not be executed. """ if not settings.PREPROCESSING_FILENAME.exists(): logging.info( "No preprocessing script found - skipping preprocessing" ) return data logging.info("Running preprocessing script.") module_name = settings.PREPROCESSING_FILENAME.stem try: spec = importlib.util.spec_from_file_location( module_name, settings.PREPROCESSING_FILENAME ) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) except ImportError as error: logging.error(f"Failed to load module {module_name}." f"{error}") try: preprocessed_data: DataContainer = module.preprocess(data=data) return preprocessed_data except AttributeError as error: traceback.print_exc() logging.error( f"Module {module_name}: " f"Error when calling preprocess() - method: " f"{error}." ) return data