From 266c2c9a072bf3289fd7f2d75278b7d59528378c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 24 Dec 2022 10:27:09 +0100 Subject: package restructuring: base * Reader and Extractor to respective reader/extractor modules * ReaderBuilder to reader module * ExtractorBuilder to extractor module * Loading module in utils (safe_load, unpack_name) * Pipeline and PipelineBuilder to lib module * errors to utils * documentation: "standard import" and "external import" --- bsie/reader/__init__.py | 13 +++++++++ bsie/reader/base.py | 47 +++++++++++++++++++++++++++++++ bsie/reader/builder.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++++ bsie/reader/path.py | 8 +++--- bsie/reader/stat.py | 9 ++++-- 5 files changed, 144 insertions(+), 7 deletions(-) create mode 100644 bsie/reader/base.py create mode 100644 bsie/reader/builder.py (limited to 'bsie/reader') diff --git a/bsie/reader/__init__.py b/bsie/reader/__init__.py index a45f22b..4163d1c 100644 --- a/bsie/reader/__init__.py +++ b/bsie/reader/__init__.py @@ -15,5 +15,18 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ +# standard imports +import typing +# inner-module imports +from .base import Reader +from .builder import ReaderBuilder + +# exports +__all__: typing.Sequence[str] = ( + 'Reader', + 'ReaderBuilder', + ) + +## EOF ## ## EOF ## diff --git a/bsie/reader/base.py b/bsie/reader/base.py new file mode 100644 index 0000000..cbabd36 --- /dev/null +++ b/bsie/reader/base.py @@ -0,0 +1,47 @@ +"""The Reader classes return high-level content structures from files. + +The Reader fulfills two purposes: + First, it brokers between multiple libraries and file formats. + Second, it separates multiple aspects of a file into distinct content types. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import abc +import typing + +# bsie imports +from bsie.utils import bsfs + +# exports +__all__: typing.Sequence[str] = ( + 'Reader', + ) + + +## code ## + +class Reader(abc.ABC): + """Read and return some content from a file.""" + + def __str__(self) -> str: + return bsfs.typename(self) + + def __repr__(self) -> str: + return f'{bsfs.typename(self)}()' + + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) + + def __hash__(self) -> int: + return hash(type(self)) + + @abc.abstractmethod + def __call__(self, path: bsfs.URI) -> typing.Any: + """Return some content of the file at *path*. + Raises a `ReaderError` if the reader cannot make sense of the file format. + """ + +## EOF ## diff --git a/bsie/reader/builder.py b/bsie/reader/builder.py new file mode 100644 index 0000000..bce5397 --- /dev/null +++ b/bsie/reader/builder.py @@ -0,0 +1,74 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# bsie imports +from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name + +# inner-module imports +from . import base + +# exports +__all__: typing.Sequence[str] = ( + 'ReaderBuilder', + ) + + +## code ## + +class ReaderBuilder(): + """Build `bsie.base.Reader` instances. + + Readers are defined via their qualified class name + (e.g., bsie.reader.path.Path) and optional keyword + arguments that are passed to the constructor via + the *kwargs* argument (name as key, kwargs as value). + The ReaderBuilder keeps a cache of previously built + reader instances, as they are anyway built with + identical keyword arguments. + + """ + + # keyword arguments + _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]] + + # cached readers + _cache: typing.Dict[str, base.Reader] + + def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]): + self._kwargs = kwargs + self._cache = {} + + def build(self, name: str) -> base.Reader: + """Return an instance for the qualified class name.""" + # return cached instance + if name in self._cache: + return self._cache[name] + + # check name and get module/class components + module_name, class_name = unpack_qualified_name(name) + + # import reader class + cls = safe_load(module_name, class_name) + + # get kwargs + kwargs = self._kwargs.get(name, {}) + if not isinstance(kwargs, dict): + raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}') + + try: # build, cache, and return instance + obj = cls(**kwargs) + # cache instance + self._cache[name] = obj + # return instance + return obj + + except Exception as err: + raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err + +## EOF ## diff --git a/bsie/reader/path.py b/bsie/reader/path.py index d60f187..1ca05a0 100644 --- a/bsie/reader/path.py +++ b/bsie/reader/path.py @@ -4,11 +4,11 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import typing -# bsie imports -from bsie.base import reader +# inner-module imports +from . import base # exports __all__: typing.Sequence[str] = ( @@ -18,7 +18,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Path(reader.Reader): +class Path(base.Reader): """Return the path.""" def __call__(self, path: str) -> str: diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py index fc5fb24..706dc47 100644 --- a/bsie/reader/stat.py +++ b/bsie/reader/stat.py @@ -4,12 +4,15 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import os import typing # bsie imports -from bsie.base import errors, reader +from bsie.utils import errors + +# inner-module imports +from . import base # exports __all__: typing.Sequence[str] = ( @@ -19,7 +22,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Stat(reader.Reader): +class Stat(base.Reader): """Read and return the filesystem's stat infos.""" def __call__(self, path: str) -> os.stat_result: -- cgit v1.2.3 From 07219685d01f803dc46c8d5465fa542c1d822cb4 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 24 Dec 2022 10:39:51 +0100 Subject: documentation: standard vs external import --- bsie/reader/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'bsie/reader') diff --git a/bsie/reader/base.py b/bsie/reader/base.py index cbabd36..08d6cc6 100644 --- a/bsie/reader/base.py +++ b/bsie/reader/base.py @@ -8,7 +8,7 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import abc import typing -- cgit v1.2.3 From 17f03ae3d3dc53fe973f37fe4dea4a831b4f97d7 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 24 Dec 2022 16:06:16 +0100 Subject: ReaderChain and image reader --- bsie/reader/base.py | 2 +- bsie/reader/chain.py | 85 +++++++++++++++++++++++++++++++++++++++++++ bsie/reader/image/__init__.py | 36 ++++++++++++++++++ bsie/reader/image/_pillow.py | 37 +++++++++++++++++++ bsie/reader/image/_raw.py | 61 +++++++++++++++++++++++++++++++ 5 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 bsie/reader/chain.py create mode 100644 bsie/reader/image/__init__.py create mode 100644 bsie/reader/image/_pillow.py create mode 100644 bsie/reader/image/_raw.py (limited to 'bsie/reader') diff --git a/bsie/reader/base.py b/bsie/reader/base.py index 08d6cc6..099a327 100644 --- a/bsie/reader/base.py +++ b/bsie/reader/base.py @@ -39,7 +39,7 @@ class Reader(abc.ABC): return hash(type(self)) @abc.abstractmethod - def __call__(self, path: bsfs.URI) -> typing.Any: + def __call__(self, path: str) -> typing.Any: """Return some content of the file at *path*. Raises a `ReaderError` if the reader cannot make sense of the file format. """ diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py new file mode 100644 index 0000000..8e900e1 --- /dev/null +++ b/bsie/reader/chain.py @@ -0,0 +1,85 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import logging +import typing + +# bsie imports +from bsie.utils import bsfs, errors + +# inner-module imports +from . import base +from . import builder + +# exports +__all__: typing.Sequence[str] = ( + 'ReaderChain', + ) + + +## code ## + +logger = logging.getLogger(__name__) + +# Content type. +T_CONTENT = typing.TypeVar('T_CONTENT') # pylint: disable=invalid-name + +class ReaderChain(base.Reader, typing.Generic[T_CONTENT]): + """Read an image.""" + + # sub-readers for specific file formats. + _children: typing.Tuple[base.Reader, ...] + + def __init__( + self, + subreader_names: typing.Iterable[str], + cfg: typing.Any, + ): + rbuild = builder.ReaderBuilder(cfg) + children = [] + for name in subreader_names: + try: + # build sub-reader + children.append(rbuild.build(name)) + except (ValueError, + TypeError, + errors.LoaderError, + errors.BuilderError) as err: + # failed to build a child; skip and notify + logger.warning('failed to load reader: %s', err) + + if len(children) == 0: + logger.warning('%s failed to load any sub-readers.', bsfs.typename(self)) + + # copy children to member + self._children = tuple(children) + + def __str__(self) -> str: + substr = ', '.join(str(child) for child in self._children) + return f'{bsfs.typename(self)}({substr})' + + def __repr__(self) -> str: + return f'{bsfs.typename(self)}({self._children})' + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self._children == other._children + + def __hash__(self) -> int: + return hash((super().__hash__(), self._children)) + + def __call__(self, path: str) -> T_CONTENT: + for child in self._children: + try: + return child(path) + except errors.ReaderError: + # child cannot read the file, skip. + pass + + raise errors.ReaderError(path) + +## EOF ## diff --git a/bsie/reader/image/__init__.py b/bsie/reader/image/__init__.py new file mode 100644 index 0000000..85dad85 --- /dev/null +++ b/bsie/reader/image/__init__.py @@ -0,0 +1,36 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import PIL.Image + +# inner-module imports +from .. import chain + +# constants +_FILE_FORMAT_READERS: typing.Sequence[str] = ( + __package__ + '._raw.RawImage', + __package__ + '._pillow.PillowImage', + ) + +# exports +__all__: typing.Sequence[str] = ( + 'Image', + ) + + +## code ## + +class Image(chain.ReaderChain[PIL.Image]): # pylint: disable=too-few-public-methods + """Read an image file.""" + + def __init__(self, cfg): + super().__init__(_FILE_FORMAT_READERS, cfg) + +## EOF ## diff --git a/bsie/reader/image/_pillow.py b/bsie/reader/image/_pillow.py new file mode 100644 index 0000000..ee0662d --- /dev/null +++ b/bsie/reader/image/_pillow.py @@ -0,0 +1,37 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# inner-module imports +from .. import base + +# exports +__all__: typing.Sequence[str] = ( + 'PillowImage', + ) + + +## code ## + +class PillowImage(base.Reader): + """Use PIL to read content of a variety of image file types.""" + + def __call__(self, path: str) -> PIL.Image: + try: + # open file with PIL + return PIL.Image.open(path) + except IOError as err: + raise errors.ReaderError(path) from err + +# EOF ## diff --git a/bsie/reader/image/_raw.py b/bsie/reader/image/_raw.py new file mode 100644 index 0000000..77be357 --- /dev/null +++ b/bsie/reader/image/_raw.py @@ -0,0 +1,61 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import PIL.Image +import rawpy + +# bsie imports +from bsie.utils import errors, filematcher + +# inner-module imports +from .. import base + +# constants +MATCH_RULE = 'mime={image/x-nikon-nef} | extension={nef}' + +# exports +__all__: typing.Sequence[str] = ( + 'RawImage', + ) + + +## code ## + +class RawImage(base.Reader): + """Use rawpy to read content of raw image file types.""" + + # file matcher + match: filematcher.Matcher + + # additional kwargs to rawpy's postprocess + rawpy_kwargs: typing.Dict[str, typing.Any] + + def __init__(self, **rawpy_kwargs): + match_rule = rawpy_kwargs.pop('file_match_rule', MATCH_RULE) + self._match = filematcher.parse(match_rule) + self._rawpy_kwargs = rawpy_kwargs + + def __call__(self, path: str) -> PIL.Image: + # perform quick checks first + if not self._match(path): + raise errors.ReaderError(path) + + try: + # open file with rawpy + ary = rawpy.imread(path).postprocess(**self._rawpy_kwargs) + # convert to PIL.Image + return PIL.Image.fromarray(ary) + except (rawpy.LibRawFatalError, # pylint: disable=no-member # pylint doesn't find the errors + rawpy.NotSupportedError, # pylint: disable=no-member + rawpy.LibRawNonFatalError, # pylint: disable=no-member + ) as err: + raise errors.ReaderError(path) from err + +## EOF ## -- cgit v1.2.3 From 3f93be488638fdf6668e0e03e2b1634bb969ca80 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 15:39:16 +0100 Subject: random fixes --- bsie/reader/image/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'bsie/reader') diff --git a/bsie/reader/image/__init__.py b/bsie/reader/image/__init__.py index 85dad85..b7587e7 100644 --- a/bsie/reader/image/__init__.py +++ b/bsie/reader/image/__init__.py @@ -27,7 +27,8 @@ __all__: typing.Sequence[str] = ( ## code ## -class Image(chain.ReaderChain[PIL.Image]): # pylint: disable=too-few-public-methods +# FIXME: Check if PIL.Image or PIL.Image.Image, or if version-dependent +class Image(chain.ReaderChain[PIL.Image.Image]): # pylint: disable=too-few-public-methods """Read an image file.""" def __init__(self, cfg): -- cgit v1.2.3 From afd165000c1661a9cca117a4844ad3f89d926fdb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 20:53:39 +0100 Subject: unsupported file format exception --- bsie/reader/chain.py | 7 +++++-- bsie/reader/image/_pillow.py | 2 ++ bsie/reader/image/_raw.py | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'bsie/reader') diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py index 8e900e1..db7c2d5 100644 --- a/bsie/reader/chain.py +++ b/bsie/reader/chain.py @@ -73,13 +73,16 @@ class ReaderChain(base.Reader, typing.Generic[T_CONTENT]): return hash((super().__hash__(), self._children)) def __call__(self, path: str) -> T_CONTENT: + raise_error = errors.UnsupportedFileFormatError for child in self._children: try: return child(path) + except errors.UnsupportedFileFormatError: + pass except errors.ReaderError: # child cannot read the file, skip. - pass + raise_error = errors.ReaderError # type: ignore [assignment] # mypy is confused - raise errors.ReaderError(path) + raise raise_error(path) ## EOF ## diff --git a/bsie/reader/image/_pillow.py b/bsie/reader/image/_pillow.py index ee0662d..3144509 100644 --- a/bsie/reader/image/_pillow.py +++ b/bsie/reader/image/_pillow.py @@ -31,6 +31,8 @@ class PillowImage(base.Reader): try: # open file with PIL return PIL.Image.open(path) + except PIL.UnidentifiedImageError as err: + raise errors.UnsupportedFileFormatError(path) from err except IOError as err: raise errors.ReaderError(path) from err diff --git a/bsie/reader/image/_raw.py b/bsie/reader/image/_raw.py index 77be357..cd60453 100644 --- a/bsie/reader/image/_raw.py +++ b/bsie/reader/image/_raw.py @@ -45,7 +45,7 @@ class RawImage(base.Reader): def __call__(self, path: str) -> PIL.Image: # perform quick checks first if not self._match(path): - raise errors.ReaderError(path) + raise errors.UnsupportedFileFormatError(path) try: # open file with rawpy -- cgit v1.2.3 From 8439089807bbad92e95ad9062dc74c3d71f5d7eb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:35:19 +0100 Subject: ReaderBuilder optional config --- bsie/reader/builder.py | 6 +++++- bsie/reader/chain.py | 2 +- bsie/reader/image/__init__.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'bsie/reader') diff --git a/bsie/reader/builder.py b/bsie/reader/builder.py index bce5397..8699e75 100644 --- a/bsie/reader/builder.py +++ b/bsie/reader/builder.py @@ -40,7 +40,11 @@ class ReaderBuilder(): # cached readers _cache: typing.Dict[str, base.Reader] - def __init__(self, kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]): + def __init__( + self, + kwargs: typing.Optional[typing.Dict[str, typing.Dict[str, typing.Any]]] = None): + if kwargs is None: + kwargs = {} self._kwargs = kwargs self._cache = {} diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py index db7c2d5..5e9e0d5 100644 --- a/bsie/reader/chain.py +++ b/bsie/reader/chain.py @@ -37,7 +37,7 @@ class ReaderChain(base.Reader, typing.Generic[T_CONTENT]): def __init__( self, subreader_names: typing.Iterable[str], - cfg: typing.Any, + cfg: typing.Optional[typing.Any] = None, ): rbuild = builder.ReaderBuilder(cfg) children = [] diff --git a/bsie/reader/image/__init__.py b/bsie/reader/image/__init__.py index b7587e7..1f290b5 100644 --- a/bsie/reader/image/__init__.py +++ b/bsie/reader/image/__init__.py @@ -31,7 +31,7 @@ __all__: typing.Sequence[str] = ( class Image(chain.ReaderChain[PIL.Image.Image]): # pylint: disable=too-few-public-methods """Read an image file.""" - def __init__(self, cfg): + def __init__(self, cfg: typing.Optional[typing.Any] = None): super().__init__(_FILE_FORMAT_READERS, cfg) ## EOF ## -- cgit v1.2.3