aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/reader
diff options
context:
space:
mode:
Diffstat (limited to 'bsie/reader')
-rw-r--r--bsie/reader/__init__.py13
-rw-r--r--bsie/reader/base.py47
-rw-r--r--bsie/reader/builder.py78
-rw-r--r--bsie/reader/chain.py88
-rw-r--r--bsie/reader/image/__init__.py37
-rw-r--r--bsie/reader/image/_pillow.py39
-rw-r--r--bsie/reader/image/_raw.py61
-rw-r--r--bsie/reader/path.py8
-rw-r--r--bsie/reader/stat.py9
9 files changed, 373 insertions, 7 deletions
diff --git a/bsie/reader/__init__.py b/bsie/reader/__init__.py
index a45f22b..4163d1c 100644
--- a/bsie/reader/__init__.py
+++ b/bsie/reader/__init__.py
@@ -15,5 +15,18 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
+# standard imports
+import typing
+# inner-module imports
+from .base import Reader
+from .builder import ReaderBuilder
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Reader',
+ 'ReaderBuilder',
+ )
+
+## EOF ##
## EOF ##
diff --git a/bsie/reader/base.py b/bsie/reader/base.py
new file mode 100644
index 0000000..099a327
--- /dev/null
+++ b/bsie/reader/base.py
@@ -0,0 +1,47 @@
+"""The Reader classes return high-level content structures from files.
+
+The Reader fulfills two purposes:
+ First, it brokers between multiple libraries and file formats.
+ Second, it separates multiple aspects of a file into distinct content types.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import abc
+import typing
+
+# bsie imports
+from bsie.utils import bsfs
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Reader',
+ )
+
+
+## code ##
+
+class Reader(abc.ABC):
+ """Read and return some content from a file."""
+
+ def __str__(self) -> str:
+ return bsfs.typename(self)
+
+ def __repr__(self) -> str:
+ return f'{bsfs.typename(self)}()'
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return isinstance(other, type(self))
+
+ def __hash__(self) -> int:
+ return hash(type(self))
+
+ @abc.abstractmethod
+ def __call__(self, path: str) -> typing.Any:
+ """Return some content of the file at *path*.
+ Raises a `ReaderError` if the reader cannot make sense of the file format.
+ """
+
+## EOF ##
diff --git a/bsie/reader/builder.py b/bsie/reader/builder.py
new file mode 100644
index 0000000..8699e75
--- /dev/null
+++ b/bsie/reader/builder.py
@@ -0,0 +1,78 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name
+
+# inner-module imports
+from . import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'ReaderBuilder',
+ )
+
+
+## code ##
+
+class ReaderBuilder():
+ """Build `bsie.base.Reader` instances.
+
+ Readers are defined via their qualified class name
+ (e.g., bsie.reader.path.Path) and optional keyword
+ arguments that are passed to the constructor via
+ the *kwargs* argument (name as key, kwargs as value).
+ The ReaderBuilder keeps a cache of previously built
+ reader instances, as they are anyway built with
+ identical keyword arguments.
+
+ """
+
+ # keyword arguments
+ _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]
+
+ # cached readers
+ _cache: typing.Dict[str, base.Reader]
+
+ def __init__(
+ self,
+ kwargs: typing.Optional[typing.Dict[str, typing.Dict[str, typing.Any]]] = None):
+ if kwargs is None:
+ kwargs = {}
+ self._kwargs = kwargs
+ self._cache = {}
+
+ def build(self, name: str) -> base.Reader:
+ """Return an instance for the qualified class name."""
+ # return cached instance
+ if name in self._cache:
+ return self._cache[name]
+
+ # check name and get module/class components
+ module_name, class_name = unpack_qualified_name(name)
+
+ # import reader class
+ cls = safe_load(module_name, class_name)
+
+ # get kwargs
+ kwargs = self._kwargs.get(name, {})
+ if not isinstance(kwargs, dict):
+ raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}')
+
+ try: # build, cache, and return instance
+ obj = cls(**kwargs)
+ # cache instance
+ self._cache[name] = obj
+ # return instance
+ return obj
+
+ except Exception as err:
+ raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err
+
+## EOF ##
diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py
new file mode 100644
index 0000000..5e9e0d5
--- /dev/null
+++ b/bsie/reader/chain.py
@@ -0,0 +1,88 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import logging
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, errors
+
+# inner-module imports
+from . import base
+from . import builder
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'ReaderChain',
+ )
+
+
+## code ##
+
+logger = logging.getLogger(__name__)
+
+# Content type.
+T_CONTENT = typing.TypeVar('T_CONTENT') # pylint: disable=invalid-name
+
+class ReaderChain(base.Reader, typing.Generic[T_CONTENT]):
+ """Read an image."""
+
+ # sub-readers for specific file formats.
+ _children: typing.Tuple[base.Reader, ...]
+
+ def __init__(
+ self,
+ subreader_names: typing.Iterable[str],
+ cfg: typing.Optional[typing.Any] = None,
+ ):
+ rbuild = builder.ReaderBuilder(cfg)
+ children = []
+ for name in subreader_names:
+ try:
+ # build sub-reader
+ children.append(rbuild.build(name))
+ except (ValueError,
+ TypeError,
+ errors.LoaderError,
+ errors.BuilderError) as err:
+ # failed to build a child; skip and notify
+ logger.warning('failed to load reader: %s', err)
+
+ if len(children) == 0:
+ logger.warning('%s failed to load any sub-readers.', bsfs.typename(self))
+
+ # copy children to member
+ self._children = tuple(children)
+
+ def __str__(self) -> str:
+ substr = ', '.join(str(child) for child in self._children)
+ return f'{bsfs.typename(self)}({substr})'
+
+ def __repr__(self) -> str:
+ return f'{bsfs.typename(self)}({self._children})'
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) \
+ and self._children == other._children
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self._children))
+
+ def __call__(self, path: str) -> T_CONTENT:
+ raise_error = errors.UnsupportedFileFormatError
+ for child in self._children:
+ try:
+ return child(path)
+ except errors.UnsupportedFileFormatError:
+ pass
+ except errors.ReaderError:
+ # child cannot read the file, skip.
+ raise_error = errors.ReaderError # type: ignore [assignment] # mypy is confused
+
+ raise raise_error(path)
+
+## EOF ##
diff --git a/bsie/reader/image/__init__.py b/bsie/reader/image/__init__.py
new file mode 100644
index 0000000..1f290b5
--- /dev/null
+++ b/bsie/reader/image/__init__.py
@@ -0,0 +1,37 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# external imports
+import PIL.Image
+
+# inner-module imports
+from .. import chain
+
+# constants
+_FILE_FORMAT_READERS: typing.Sequence[str] = (
+ __package__ + '._raw.RawImage',
+ __package__ + '._pillow.PillowImage',
+ )
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Image',
+ )
+
+
+## code ##
+
+# FIXME: Check if PIL.Image or PIL.Image.Image, or if version-dependent
+class Image(chain.ReaderChain[PIL.Image.Image]): # pylint: disable=too-few-public-methods
+ """Read an image file."""
+
+ def __init__(self, cfg: typing.Optional[typing.Any] = None):
+ super().__init__(_FILE_FORMAT_READERS, cfg)
+
+## EOF ##
diff --git a/bsie/reader/image/_pillow.py b/bsie/reader/image/_pillow.py
new file mode 100644
index 0000000..3144509
--- /dev/null
+++ b/bsie/reader/image/_pillow.py
@@ -0,0 +1,39 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# external imports
+import PIL.Image
+
+# bsie imports
+from bsie.utils import errors
+
+# inner-module imports
+from .. import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'PillowImage',
+ )
+
+
+## code ##
+
+class PillowImage(base.Reader):
+ """Use PIL to read content of a variety of image file types."""
+
+ def __call__(self, path: str) -> PIL.Image:
+ try:
+ # open file with PIL
+ return PIL.Image.open(path)
+ except PIL.UnidentifiedImageError as err:
+ raise errors.UnsupportedFileFormatError(path) from err
+ except IOError as err:
+ raise errors.ReaderError(path) from err
+
+# EOF ##
diff --git a/bsie/reader/image/_raw.py b/bsie/reader/image/_raw.py
new file mode 100644
index 0000000..cd60453
--- /dev/null
+++ b/bsie/reader/image/_raw.py
@@ -0,0 +1,61 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# external imports
+import PIL.Image
+import rawpy
+
+# bsie imports
+from bsie.utils import errors, filematcher
+
+# inner-module imports
+from .. import base
+
+# constants
+MATCH_RULE = 'mime={image/x-nikon-nef} | extension={nef}'
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'RawImage',
+ )
+
+
+## code ##
+
+class RawImage(base.Reader):
+ """Use rawpy to read content of raw image file types."""
+
+ # file matcher
+ match: filematcher.Matcher
+
+ # additional kwargs to rawpy's postprocess
+ rawpy_kwargs: typing.Dict[str, typing.Any]
+
+ def __init__(self, **rawpy_kwargs):
+ match_rule = rawpy_kwargs.pop('file_match_rule', MATCH_RULE)
+ self._match = filematcher.parse(match_rule)
+ self._rawpy_kwargs = rawpy_kwargs
+
+ def __call__(self, path: str) -> PIL.Image:
+ # perform quick checks first
+ if not self._match(path):
+ raise errors.UnsupportedFileFormatError(path)
+
+ try:
+ # open file with rawpy
+ ary = rawpy.imread(path).postprocess(**self._rawpy_kwargs)
+ # convert to PIL.Image
+ return PIL.Image.fromarray(ary)
+ except (rawpy.LibRawFatalError, # pylint: disable=no-member # pylint doesn't find the errors
+ rawpy.NotSupportedError, # pylint: disable=no-member
+ rawpy.LibRawNonFatalError, # pylint: disable=no-member
+ ) as err:
+ raise errors.ReaderError(path) from err
+
+## EOF ##
diff --git a/bsie/reader/path.py b/bsie/reader/path.py
index d60f187..1ca05a0 100644
--- a/bsie/reader/path.py
+++ b/bsie/reader/path.py
@@ -4,11 +4,11 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
-# bsie imports
-from bsie.base import reader
+# inner-module imports
+from . import base
# exports
__all__: typing.Sequence[str] = (
@@ -18,7 +18,7 @@ __all__: typing.Sequence[str] = (
## code ##
-class Path(reader.Reader):
+class Path(base.Reader):
"""Return the path."""
def __call__(self, path: str) -> str:
diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py
index fc5fb24..706dc47 100644
--- a/bsie/reader/stat.py
+++ b/bsie/reader/stat.py
@@ -4,12 +4,15 @@ Part of the bsie module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import typing
# bsie imports
-from bsie.base import errors, reader
+from bsie.utils import errors
+
+# inner-module imports
+from . import base
# exports
__all__: typing.Sequence[str] = (
@@ -19,7 +22,7 @@ __all__: typing.Sequence[str] = (
## code ##
-class Stat(reader.Reader):
+class Stat(base.Reader):
"""Read and return the filesystem's stat infos."""
def __call__(self, path: str) -> os.stat_result: