""" Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # standard imports import logging import typing # bsie imports from bsie.extractor import ExtractorBuilder from bsie.reader import ReaderBuilder from bsie.utils import bsfs, errors # inner-module imports from . import pipeline # exports __all__: typing.Sequence[str] = ( 'PipelineBuilder', ) ## code ## logger = logging.getLogger(__name__) class PipelineBuilder(): """Build `bsie.tools.pipeline.Pipeline` instances.""" # Prefix to be used in the Pipeline. prefix: bsfs.Namespace # builder for Readers. rbuild: ReaderBuilder # builder for Extractors. ebuild: ExtractorBuilder def __init__( self, prefix: bsfs.Namespace, reader_builder: ReaderBuilder, extractor_builder: ExtractorBuilder, ): self.prefix = prefix self.rbuild = reader_builder self.ebuild = extractor_builder def build(self) -> pipeline.Pipeline: """Return a Pipeline instance.""" ext2rdr = {} for eidx in self.ebuild: # build extractor try: ext = self.ebuild.build(eidx) except errors.LoaderError as err: # failed to load extractor; skip logger.error('failed to load extractor: %s', err) continue except errors.BuilderError as err: # failed to build instance; skip logger.error(str(err)) continue try: # get reader required by extractor if ext.CONTENT_READER is not None: rdr = self.rbuild.build(ext.CONTENT_READER) else: rdr = None # store extractor ext2rdr[ext] = rdr except errors.LoaderError as err: # failed to load reader logger.error('failed to load reader: %s', err) except errors.BuilderError as err: # failed to build reader logger.error(str(err)) return pipeline.Pipeline(self.prefix, ext2rdr) ## EOF ##