From 266c2c9a072bf3289fd7f2d75278b7d59528378c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 24 Dec 2022 10:27:09 +0100 Subject: package restructuring: base * Reader and Extractor to respective reader/extractor modules * ReaderBuilder to reader module * ExtractorBuilder to extractor module * Loading module in utils (safe_load, unpack_name) * Pipeline and PipelineBuilder to lib module * errors to utils * documentation: "standard import" and "external import" --- bsie/reader/__init__.py | 13 +++++++++ bsie/reader/base.py | 47 +++++++++++++++++++++++++++++++ bsie/reader/builder.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++++ bsie/reader/path.py | 8 +++--- bsie/reader/stat.py | 9 ++++-- 5 files changed, 144 insertions(+), 7 deletions(-) create mode 100644 bsie/reader/base.py create mode 100644 bsie/reader/builder.py (limited to 'bsie/reader') diff --git a/bsie/reader/__init__.py b/bsie/reader/__init__.py index a45f22b..4163d1c 100644 --- a/bsie/reader/__init__.py +++ b/bsie/reader/__init__.py @@ -15,5 +15,18 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ +# standard imports +import typing +# inner-module imports +from .base import Reader +from .builder import ReaderBuilder + +# exports +__all__: typing.Sequence[str] = ( + 'Reader', + 'ReaderBuilder', + ) + +## EOF ## ## EOF ## diff --git a/bsie/reader/base.py b/bsie/reader/base.py new file mode 100644 index 0000000..cbabd36 --- /dev/null +++ b/bsie/reader/base.py @@ -0,0 +1,47 @@ +"""The Reader classes return high-level content structures from files. + +The Reader fulfills two purposes: + First, it brokers between multiple libraries and file formats. + Second, it separates multiple aspects of a file into distinct content types. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import abc +import typing + +# bsie imports +from bsie.utils import bsfs + +# exports +__all__: typing.Sequence[str] = ( + 'Reader', + ) + + +## code ## + +class Reader(abc.ABC): + """Read and return some content from a file.""" + + def __str__(self) -> str: + return bsfs.typename(self) + + def __repr__(self) -> str: + return f'{bsfs.typename(self)}()' + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) + + def __hash__(self) -> int: + return hash(type(self)) + + @abc.abstractmethod + def __call__(self, path: bsfs.URI) -> typing.Any: + """Return some content of the file at *path*. + Raises a `ReaderError` if the reader cannot make sense of the file format. + """ + +## EOF ## diff --git a/bsie/reader/builder.py b/bsie/reader/builder.py new file mode 100644 index 0000000..bce5397 --- /dev/null +++ b/bsie/reader/builder.py @@ -0,0 +1,74 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# bsie imports +from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name + +# inner-module imports +from . import base + +# exports +__all__: typing.Sequence[str] = ( + 'ReaderBuilder', + ) + + +## code ## + +class ReaderBuilder(): + """Build `bsie.base.Reader` instances. + + Readers are defined via their qualified class name + (e.g., bsie.reader.path.Path) and optional keyword + arguments that are passed to the constructor via + the *kwargs* argument (name as key, kwargs as value). + The ReaderBuilder keeps a cache of previously built + reader instances, as they are anyway built with + identical keyword arguments. + + """ + + # keyword arguments + _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]] + + # cached readers + _cache: typing.Dict[str, base.Reader] + + def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]): + self._kwargs = kwargs + self._cache = {} + + def build(self, name: str) -> base.Reader: + """Return an instance for the qualified class name.""" + # return cached instance + if name in self._cache: + return self._cache[name] + + # check name and get module/class components + module_name, class_name = unpack_qualified_name(name) + + # import reader class + cls = safe_load(module_name, class_name) + + # get kwargs + kwargs = self._kwargs.get(name, {}) + if not isinstance(kwargs, dict): + raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}') + + try: # build, cache, and return instance + obj = cls(**kwargs) + # cache instance + self._cache[name] = obj + # return instance + return obj + + except Exception as err: + raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err + +## EOF ## diff --git a/bsie/reader/path.py b/bsie/reader/path.py index d60f187..1ca05a0 100644 --- a/bsie/reader/path.py +++ b/bsie/reader/path.py @@ -4,11 +4,11 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing -# bsie imports -from bsie.base import reader +# inner-module imports +from . import base # exports __all__: typing.Sequence[str] = ( @@ -18,7 +18,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Path(reader.Reader): +class Path(base.Reader): """Return the path.""" def __call__(self, path: str) -> str: diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py index fc5fb24..706dc47 100644 --- a/bsie/reader/stat.py +++ b/bsie/reader/stat.py @@ -4,12 +4,15 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import typing # bsie imports -from bsie.base import errors, reader +from bsie.utils import errors + +# inner-module imports +from . import base # exports __all__: typing.Sequence[str] = ( @@ -19,7 +22,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Stat(reader.Reader): +class Stat(base.Reader): """Read and return the filesystem's stat infos.""" def __call__(self, path: str) -> os.stat_result: -- cgit v1.2.3