diff options
Diffstat (limited to 'bsie/lib/builder.py')
-rw-r--r-- | bsie/lib/builder.py | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/bsie/lib/builder.py b/bsie/lib/builder.py new file mode 100644 index 0000000..3a15311 --- /dev/null +++ b/bsie/lib/builder.py @@ -0,0 +1,75 @@ + +# standard imports +import logging +import typing + +# bsie imports +from bsie.extractor import ExtractorBuilder +from bsie.reader import ReaderBuilder +from bsie.utils import errors + +# inner-module imports +from . import pipeline + +# exports +__all__: typing.Sequence[str] = ( + 'PipelineBuilder', + ) + + +## code ## + +logger = logging.getLogger(__name__) + +class PipelineBuilder(): + """Build `bsie.tools.pipeline.Pipeline` instances.""" + + # builder for Readers. + rbuild: ReaderBuilder + + # builder for Extractors. + ebuild: ExtractorBuilder + + def __init__( + self, + reader_builder: ReaderBuilder, + extractor_builder: ExtractorBuilder, + ): + self.rbuild = reader_builder + self.ebuild = extractor_builder + + def build(self) -> pipeline.Pipeline: + """Return a Pipeline instance.""" + ext2rdr = {} + + for eidx in self.ebuild: + # build extractor + try: + ext = self.ebuild.build(eidx) + + except errors.LoaderError as err: # failed to load extractor; skip + logger.error('failed to load extractor: %s', err) + continue + + except errors.BuilderError as err: # failed to build instance; skip + logger.error(str(err)) + continue + + try: + # get reader required by extractor + if ext.CONTENT_READER is not None: + rdr = self.rbuild.build(ext.CONTENT_READER) + else: + rdr = None + # store extractor + ext2rdr[ext] = rdr + + except errors.LoaderError as err: # failed to load reader + logger.error('failed to load reader: %s', err) + + except errors.BuilderError as err: # failed to build reader + logger.error(str(err)) + + return pipeline.Pipeline(ext2rdr) + +## EOF ## |