diff options
author | Matthias Baumgartner <dev@igsor.net> | 2022-12-24 10:27:09 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2022-12-24 10:27:09 +0100 |
commit | 266c2c9a072bf3289fd7f2d75278b7d59528378c (patch) | |
tree | 60760e0fec84d5cd7b3f3efef11e3892df5cc85a /bsie/extractor/builder.py | |
parent | ed2074ae88f2db6cb6b38716b43b35e29eb2e16c (diff) | |
download | bsie-266c2c9a072bf3289fd7f2d75278b7d59528378c.tar.gz bsie-266c2c9a072bf3289fd7f2d75278b7d59528378c.tar.bz2 bsie-266c2c9a072bf3289fd7f2d75278b7d59528378c.zip |
package restructuring: base
* Reader and Extractor to respective reader/extractor modules
* ReaderBuilder to reader module
* ExtractorBuilder to extractor module
* Loading module in utils (safe_load, unpack_name)
* Pipeline and PipelineBuilder to lib module
* errors to utils
* documentation: "standard import" and "external import"
Diffstat (limited to 'bsie/extractor/builder.py')
-rw-r--r-- | bsie/extractor/builder.py | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/bsie/extractor/builder.py b/bsie/extractor/builder.py new file mode 100644 index 0000000..0fd3685 --- /dev/null +++ b/bsie/extractor/builder.py @@ -0,0 +1,77 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# bsie imports +from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name + +# inner-module imports +from . import base + +# exports +__all__: typing.Sequence[str] = ( + 'ExtractorBuilder', + ) + + +## code ## + +class ExtractorBuilder(): + """Build `bsie.base.Extractor instances. + + It is permissible to build multiple instances of the same extractor + (typically with different arguments), hence the ExtractorBuilder + receives a list of build specifications. Each specification is + a dict with a single key (extractor's qualified name) and a dict + to be used as keyword arguments. + Example: [{'bsie.extractor.generic.path.Path': {}}, ] + + """ + + # build specifications + _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]] + + def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]): + self._specs = specs + + def __iter__(self) -> typing.Iterator[int]: + """Iterate over extractor specifications.""" + return iter(range(len(self._specs))) + + def build(self, index: int) -> base.Extractor: + """Return an instance of the n'th extractor (n=*index*).""" + # get build instructions + specs = self._specs[index] + + # check specs structure. expecting[{name: {kwargs}}] + if not isinstance(specs, dict): + raise TypeError(f'expected a dict, found {bsfs.typename(specs)}') + if len(specs) != 1: + raise TypeError(f'expected a dict of length one, found {len(specs)}') + + # get name and args from specs + name = next(iter(specs.keys())) + kwargs = specs[name] + + # check kwargs structure + if not isinstance(kwargs, dict): + raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}') + + # check name and get module/class components + module_name, class_name = unpack_qualified_name(name) + + # import extractor class + cls = safe_load(module_name, class_name) + + try: # build and return instance + return cls(**kwargs) + + except Exception as err: + raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err + +## EOF ## |