Source code for esgprep.checkvocab.context

# -*- coding: utf-8 -*-

"""
    :platform: Unix
    :synopsis: Processing context used in this module.

"""

from constants import *
from esgprep.utils.collectors import PathCollector, DatasetCollector, Collector
from esgprep.utils.context import MultiprocessingContext
from esgprep.utils.custom_print import *


[docs]class ProcessingContext(MultiprocessingContext): """ Encapsulates the processing context/information for main process. :param ArgumentParser args: The command-line arguments parser :returns: The processing context :rtype: *ProcessingContext* """ def __init__(self, args): super(ProcessingContext, self).__init__(args) # True if undeclared facets self.any_undeclared = False # Add sources values to process manager if self.use_pool: self.source_values = self.manager.dict({}) else: self.source_values = dict() def __enter__(self): super(ProcessingContext, self).__enter__() # Get the DRS facet keys from pattern self.facets = list() self.facets = re.compile(self.cfg.translate('directory_format', add_ending_filename=True)).groupindex.keys() self.facets.extend(re.compile(self.cfg.translate('dataset_id')).groupindex.keys()) self.facets = set(self.facets).difference(set(IGNORED_KEYS)) # Init data collector if self.directory: # The source is a list of directories self.source_type = 'file' self.sources = PathCollector(sources=self.directory) # Init file filter for regex, inclusive in self.file_filter: self.sources.FileFilter.add(regex=regex, inclusive=inclusive) # Init dir filter self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False) self.pattern = self.cfg.translate('directory_format', add_ending_filename=True) elif self.incoming: # The source is a dataset ID (potentially from stdin) self.source_type = 'file' self.sources = Collector(sources=self.incoming) # Init file filter for regex, inclusive in self.file_filter: self.sources.FileFilter.add(regex=regex, inclusive=inclusive) # Init dir filter self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False) # Translate dataset_id format self.pattern = self.cfg.translate('filename_format') elif self.dataset_id: # The source is a dataset ID (potentially from stdin) self.source_type = 'dataset' self.sources = DatasetCollector(sources=[self.dataset_id], versioned=False) # Translate dataset_id format self.pattern = self.cfg.translate('dataset_id') else: # The source is a list of files (i.e., several dataset lists) # Has to be tested at the end because args.dataset_list never None, see __init__ comment. self.source_type = 'dataset' self.sources = DatasetCollector(sources=[x.strip() for x in self.dataset_list.readlines() if x.strip()], versioned=False) self.pattern = self.cfg.translate('dataset_id') # Get number of sources self.nbsources = len(self.sources) return self