aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/extractor/builder.py
diff options
context:
space:
mode:
Diffstat (limited to 'bsie/extractor/builder.py')
-rw-r--r--bsie/extractor/builder.py77
1 files changed, 77 insertions, 0 deletions
diff --git a/bsie/extractor/builder.py b/bsie/extractor/builder.py
new file mode 100644
index 0000000..0fd3685
--- /dev/null
+++ b/bsie/extractor/builder.py
@@ -0,0 +1,77 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name
+
+# inner-module imports
+from . import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'ExtractorBuilder',
+ )
+
+
+## code ##
+
+class ExtractorBuilder():
+ """Build `bsie.base.Extractor instances.
+
+ It is permissible to build multiple instances of the same extractor
+ (typically with different arguments), hence the ExtractorBuilder
+ receives a list of build specifications. Each specification is
+ a dict with a single key (extractor's qualified name) and a dict
+ to be used as keyword arguments.
+ Example: [{'bsie.extractor.generic.path.Path': {}}, ]
+
+ """
+
+ # build specifications
+ _specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]
+
+ def __init__(self, specs: typing.List[typing.Dict[str, typing.Dict[str, typing.Any]]]):
+ self._specs = specs
+
+ def __iter__(self) -> typing.Iterator[int]:
+ """Iterate over extractor specifications."""
+ return iter(range(len(self._specs)))
+
+ def build(self, index: int) -> base.Extractor:
+ """Return an instance of the n'th extractor (n=*index*)."""
+ # get build instructions
+ specs = self._specs[index]
+
+ # check specs structure. expecting[{name: {kwargs}}]
+ if not isinstance(specs, dict):
+ raise TypeError(f'expected a dict, found {bsfs.typename(specs)}')
+ if len(specs) != 1:
+ raise TypeError(f'expected a dict of length one, found {len(specs)}')
+
+ # get name and args from specs
+ name = next(iter(specs.keys()))
+ kwargs = specs[name]
+
+ # check kwargs structure
+ if not isinstance(kwargs, dict):
+ raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}')
+
+ # check name and get module/class components
+ module_name, class_name = unpack_qualified_name(name)
+
+ # import extractor class
+ cls = safe_load(module_name, class_name)
+
+ try: # build and return instance
+ return cls(**kwargs)
+
+ except Exception as err:
+ raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err
+
+## EOF ##