aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/reader
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-03-05 19:22:58 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-03-05 19:22:58 +0100
commita35b33f4f1ddcf6f1bb8ab0f41b87bf2b847f11d (patch)
treefb220da28bb7248ebf37ce09af5de88f2c1aaad4 /bsie/reader
parent7582c280ad5324a2f0427999911c7e7abc14a6ab (diff)
parentaf81318ae9311fd0b0e16949cef3cfaf7996970b (diff)
downloadbsie-main.tar.gz
bsie-main.tar.bz2
bsie-main.zip
Merge branch 'develop'HEADv0.23.03releasemain
Diffstat (limited to 'bsie/reader')
-rw-r--r--bsie/reader/__init__.py19
-rw-r--r--bsie/reader/base.py38
-rw-r--r--bsie/reader/builder.py73
-rw-r--r--bsie/reader/chain.py86
-rw-r--r--bsie/reader/exif.py44
-rw-r--r--bsie/reader/image/__init__.py31
-rw-r--r--bsie/reader/image/_pillow.py34
-rw-r--r--bsie/reader/image/_raw.py56
-rw-r--r--bsie/reader/path.py12
-rw-r--r--bsie/reader/preview/__init__.py34
-rw-r--r--bsie/reader/preview/_pg.py81
-rw-r--r--bsie/reader/preview/_pillow.py39
-rw-r--r--bsie/reader/preview/_rawpy.py61
-rw-r--r--bsie/reader/preview/utils.py34
-rw-r--r--bsie/reader/stat.py13
15 files changed, 635 insertions, 20 deletions
diff --git a/bsie/reader/__init__.py b/bsie/reader/__init__.py
index a45f22b..a1c38a9 100644
--- a/bsie/reader/__init__.py
+++ b/bsie/reader/__init__.py
@@ -1,8 +1,8 @@
"""The Reader classes return high-level content structures from files.
The Reader fulfills two purposes:
- First, it brokers between multiple libraries and file formats.
- Second, it separates multiple aspects of a file into distinct content types.
+First, it brokers between multiple libraries and file formats.
+Second, it separates multiple aspects of a file into distinct content types.
Often, different libraries focus on reading different types of content from a
file. E.g. one would use different modules to read file system infos than to
@@ -11,9 +11,18 @@ type. Each distinct type can be implemented in a file or submodule that
provides a Reader implementation. Through utilization of submodules, different
file formats can be supported.
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
"""
+# standard imports
+import typing
+
+# inner-module imports
+from .base import Reader
+from .builder import ReaderBuilder
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Reader',
+ 'ReaderBuilder',
+ )
## EOF ##
diff --git a/bsie/reader/base.py b/bsie/reader/base.py
new file mode 100644
index 0000000..a775701
--- /dev/null
+++ b/bsie/reader/base.py
@@ -0,0 +1,38 @@
+
+# standard imports
+import abc
+import typing
+
+# bsie imports
+from bsie.utils import bsfs
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Reader',
+ )
+
+
+## code ##
+
+class Reader(abc.ABC):
+ """Read and return some content from a file."""
+
+ def __str__(self) -> str:
+ return bsfs.typename(self)
+
+ def __repr__(self) -> str:
+ return f'{bsfs.typename(self)}()'
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return isinstance(other, type(self))
+
+ def __hash__(self) -> int:
+ return hash(type(self))
+
+ @abc.abstractmethod
+ def __call__(self, path: str) -> typing.Any:
+ """Return some content of the file at *path*.
+ Raises a `ReaderError` if the reader cannot make sense of the file format.
+ """
+
+## EOF ##
diff --git a/bsie/reader/builder.py b/bsie/reader/builder.py
new file mode 100644
index 0000000..d32700b
--- /dev/null
+++ b/bsie/reader/builder.py
@@ -0,0 +1,73 @@
+
+# standard imports
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, errors, safe_load, unpack_qualified_name
+
+# inner-module imports
+from . import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'ReaderBuilder',
+ )
+
+
+## code ##
+
+class ReaderBuilder():
+ """Build `bsie.base.Reader` instances.
+
+ Readers are defined via their qualified class name
+ (e.g., bsie.reader.path.Path) and optional keyword
+ arguments that are passed to the constructor via
+ the *kwargs* argument (name as key, kwargs as value).
+ The ReaderBuilder keeps a cache of previously built
+ reader instances, as they are anyway built with
+ identical keyword arguments.
+
+ """
+
+ # keyword arguments
+ _kwargs: typing.Dict[str, typing.Dict[str, typing.Any]]
+
+ # cached readers
+ _cache: typing.Dict[str, base.Reader]
+
+ def __init__(
+ self,
+ kwargs: typing.Optional[typing.Dict[str, typing.Dict[str, typing.Any]]] = None):
+ if kwargs is None:
+ kwargs = {}
+ self._kwargs = kwargs
+ self._cache = {}
+
+ def build(self, name: str) -> base.Reader:
+ """Return an instance for the qualified class name."""
+ # return cached instance
+ if name in self._cache:
+ return self._cache[name]
+
+ # check name and get module/class components
+ module_name, class_name = unpack_qualified_name(name)
+
+ # import reader class
+ cls = safe_load(module_name, class_name)
+
+ # get kwargs
+ kwargs = self._kwargs.get(name, {})
+ if not isinstance(kwargs, dict):
+ raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}')
+
+ try: # build, cache, and return instance
+ obj = cls(**kwargs)
+ # cache instance
+ self._cache[name] = obj
+ # return instance
+ return obj
+
+ except Exception as err:
+ raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err
+
+## EOF ##
diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py
new file mode 100644
index 0000000..79b44b4
--- /dev/null
+++ b/bsie/reader/chain.py
@@ -0,0 +1,86 @@
+
+# standard imports
+import logging
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, errors
+
+# inner-module imports
+from . import base
+from . import builder
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'ReaderChain',
+ )
+
+
+## code ##
+
+logger = logging.getLogger(__name__)
+
+# Content type.
+T_CONTENT = typing.TypeVar('T_CONTENT') # pylint: disable=invalid-name
+
+class ReaderChain(base.Reader, typing.Generic[T_CONTENT]):
+ """Read an image."""
+
+ # sub-readers for specific file formats.
+ _children: typing.Tuple[base.Reader, ...]
+
+ def __init__(
+ self,
+ subreader_names: typing.Iterable[str],
+ cfg: typing.Optional[typing.Any] = None,
+ ):
+ rbuild = builder.ReaderBuilder(cfg)
+ children = []
+ for name in subreader_names:
+ try:
+ # build sub-reader
+ children.append(rbuild.build(name))
+ except (ValueError,
+ TypeError,
+ errors.LoaderError,
+ errors.BuilderError) as err:
+ # failed to build a child; skip and notify
+ logger.warning('failed to load reader: %s', err)
+
+ if len(children) == 0:
+ logger.warning('%s failed to load any sub-readers.', bsfs.typename(self))
+
+ # copy children to member
+ self._children = tuple(children)
+
+ def __str__(self) -> str:
+ substr = ', '.join(str(child) for child in self._children)
+ return f'{bsfs.typename(self)}({substr})'
+
+ def __repr__(self) -> str:
+ return f'{bsfs.typename(self)}({self._children})'
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) \
+ and self._children == other._children
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), self._children))
+
+ def __call__(self, path: str) -> T_CONTENT:
+ raise_error = False
+ for child in self._children:
+ try:
+ return child(path)
+ except errors.UnsupportedFileFormatError:
+ # child cannot read the file, skip.
+ pass
+ except errors.ReaderError:
+ # child failed to read the file, skip.
+ raise_error = True
+
+ if raise_error:
+ raise errors.ReaderError(path)
+ raise errors.UnsupportedFileFormatError(path)
+
+## EOF ##
diff --git a/bsie/reader/exif.py b/bsie/reader/exif.py
new file mode 100644
index 0000000..2d0428b
--- /dev/null
+++ b/bsie/reader/exif.py
@@ -0,0 +1,44 @@
+
+# standard imports
+import typing
+
+# external imports
+import pyexiv2
+
+# bsie imports
+from bsie.utils import errors, filematcher
+
+# inner-module imports
+from . import base
+
+# constants
+MATCH_RULE = 'mime=image/jpeg'
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Exif',
+ )
+
+
+## code ##
+
+class Exif(base.Reader):
+ """Use pyexiv2 to read exif metadata from image files."""
+
+ def __init__(self):
+ self._match = filematcher.parse(MATCH_RULE)
+
+ def __call__(self, path: str) -> dict:
+ # perform quick checks first
+ if not self._match(path):
+ raise errors.UnsupportedFileFormatError(path)
+
+ try:
+ # open the file
+ img = pyexiv2.Image(path)
+ # read metadata
+ return img.read_exif()
+ except (TypeError, OSError, RuntimeError) as err:
+ raise errors.ReaderError(path) from err
+
+## EOF ##
diff --git a/bsie/reader/image/__init__.py b/bsie/reader/image/__init__.py
new file mode 100644
index 0000000..89642f2
--- /dev/null
+++ b/bsie/reader/image/__init__.py
@@ -0,0 +1,31 @@
+
+# standard imports
+import typing
+
+# external imports
+import PIL.Image
+
+# inner-module imports
+from .. import chain
+
+# constants
+_FILE_FORMAT_READERS: typing.Sequence[str] = (
+ __package__ + '._raw.RawImage',
+ __package__ + '._pillow.PillowImage',
+ )
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Image',
+ )
+
+
+## code ##
+
+class Image(chain.ReaderChain[PIL.Image.Image]): # pylint: disable=too-few-public-methods
+ """Read an image file."""
+
+ def __init__(self, cfg: typing.Optional[typing.Any] = None):
+ super().__init__(_FILE_FORMAT_READERS, cfg)
+
+## EOF ##
diff --git a/bsie/reader/image/_pillow.py b/bsie/reader/image/_pillow.py
new file mode 100644
index 0000000..0611d3c
--- /dev/null
+++ b/bsie/reader/image/_pillow.py
@@ -0,0 +1,34 @@
+
+# standard imports
+import typing
+
+# external imports
+import PIL.Image
+
+# bsie imports
+from bsie.utils import errors
+
+# inner-module imports
+from .. import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'PillowImage',
+ )
+
+
+## code ##
+
+class PillowImage(base.Reader):
+ """Use PIL to read content of a variety of image file types."""
+
+ def __call__(self, path: str) -> PIL.Image.Image:
+ try:
+ # open file with PIL
+ return PIL.Image.open(path)
+ except PIL.UnidentifiedImageError as err:
+ raise errors.UnsupportedFileFormatError(path) from err
+ except IOError as err:
+ raise errors.ReaderError(path) from err
+
+# EOF ##
diff --git a/bsie/reader/image/_raw.py b/bsie/reader/image/_raw.py
new file mode 100644
index 0000000..e5745aa
--- /dev/null
+++ b/bsie/reader/image/_raw.py
@@ -0,0 +1,56 @@
+
+# standard imports
+import typing
+
+# external imports
+import PIL.Image
+import rawpy
+
+# bsie imports
+from bsie.utils import errors, filematcher
+
+# inner-module imports
+from .. import base
+
+# constants
+MATCH_RULE = 'mime={image/x-nikon-nef} | extension={nef}'
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'RawImage',
+ )
+
+
+## code ##
+
+class RawImage(base.Reader):
+ """Use rawpy to read content of raw image file types."""
+
+ # file matcher
+ _match: filematcher.Matcher
+
+ # additional kwargs to rawpy's postprocess
+ _rawpy_kwargs: typing.Dict[str, typing.Any]
+
+ def __init__(self, **rawpy_kwargs):
+ match_rule = rawpy_kwargs.pop('file_match_rule', MATCH_RULE)
+ self._match = filematcher.parse(match_rule)
+ self._rawpy_kwargs = rawpy_kwargs
+
+ def __call__(self, path: str) -> PIL.Image.Image:
+ # perform quick checks first
+ if not self._match(path):
+ raise errors.UnsupportedFileFormatError(path)
+
+ try:
+ # open file with rawpy
+ ary = rawpy.imread(path).postprocess(**self._rawpy_kwargs)
+ # convert to PIL.Image
+ return PIL.Image.fromarray(ary)
+ except (rawpy.LibRawFatalError, # pylint: disable=no-member # pylint doesn't find the errors
+ rawpy.NotSupportedError, # pylint: disable=no-member
+ rawpy.LibRawNonFatalError, # pylint: disable=no-member
+ ) as err:
+ raise errors.ReaderError(path) from err
+
+## EOF ##
diff --git a/bsie/reader/path.py b/bsie/reader/path.py
index d60f187..45eb127 100644
--- a/bsie/reader/path.py
+++ b/bsie/reader/path.py
@@ -1,14 +1,10 @@
"""The Path reader produces a file path.
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import typing
-# bsie imports
-from bsie.base import reader
+# inner-module imports
+from . import base
# exports
__all__: typing.Sequence[str] = (
@@ -18,7 +14,7 @@ __all__: typing.Sequence[str] = (
## code ##
-class Path(reader.Reader):
+class Path(base.Reader):
"""Return the path."""
def __call__(self, path: str) -> str:
diff --git a/bsie/reader/preview/__init__.py b/bsie/reader/preview/__init__.py
new file mode 100644
index 0000000..791a133
--- /dev/null
+++ b/bsie/reader/preview/__init__.py
@@ -0,0 +1,34 @@
+
+# imports
+import typing
+
+# external imports
+import PIL.Image
+
+# inner-module imports
+from .. import chain
+
+# constants
+_FILE_FORMAT_READERS: typing.Sequence[str] = (
+ # native image formats
+ __package__ + '._pillow.PillowPreviewReader',
+ __package__ + '._rawpy.RawpyPreviewReader',
+ # multiformat readers
+ __package__ + '._pg.PreviewGeneratorReader',
+ )
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Preview',
+ )
+
+
+## code ##
+
+class Preview(chain.ReaderChain[typing.Callable[[int], PIL.Image.Image]]): # pylint: disable=too-few-public-methods
+ """Create a preview from a file."""
+
+ def __init__(self, cfg: typing.Optional[typing.Any] = None):
+ super().__init__(_FILE_FORMAT_READERS, cfg)
+
+## EOF ##
diff --git a/bsie/reader/preview/_pg.py b/bsie/reader/preview/_pg.py
new file mode 100644
index 0000000..401b33d
--- /dev/null
+++ b/bsie/reader/preview/_pg.py
@@ -0,0 +1,81 @@
+
+# standard imports
+from functools import partial
+import contextlib
+import io
+import os
+import shutil
+import tempfile
+import typing
+
+# external imports
+from preview_generator.manager import PreviewManager
+import PIL.Image
+
+# bsie imports
+from bsie.utils import errors
+
+# inner-module imports
+from .. import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'PreviewGeneratorReader',
+ )
+
+
+## code ##
+
+class PreviewGeneratorReader(base.Reader):
+ """Uses preview_generator to create previews for various data formats.
+ See `https://github.com/algoo/preview-generator`_ for details.
+ """
+
+ # PreviewManager instance.
+ _mngr: PreviewManager
+
+ # Set of mime types supported by PreviewManager.
+ _supported_mimetypes: typing.Set[str]
+
+ # PreviewManager cache.
+ _cache: str
+
+ # Determines whether the cache directory should be deleted after use.
+ _cleanup: bool
+
+ def __init__(self, cache: typing.Optional[str] = None):
+ # initialize cache directory
+ # TODO: initialize in memory, e.g., via PyFilesystem
+ if cache is None:
+ self._cache = tempfile.mkdtemp(prefix='bsie-preview-cache-')
+ self._cleanup = True
+ else:
+ self._cache = cache
+ self._cleanup = False
+ # create preview generator
+ with contextlib.redirect_stderr(io.StringIO()):
+ self._mngr = PreviewManager(self._cache, create_folder=True)
+ self._supported_mimetypes = set(self._mngr.get_supported_mimetypes())
+
+ def __del__(self):
+ if self._cleanup:
+ shutil.rmtree(self._cache, ignore_errors=True)
+
+ def __call__(self, path: str) -> typing.Callable[[int], PIL.Image.Image]:
+ if not os.path.exists(path):
+ raise errors.ReaderError(path)
+ if self._mngr.get_mimetype(path) not in self._supported_mimetypes:
+ raise errors.UnsupportedFileFormatError(path)
+ return partial(self._preview_callback, path)
+
+ def _preview_callback(self, path: str, max_side: int) -> PIL.Image.Image:
+ """Produce a jpeg preview of *path* with at most *max_side* side length."""
+ try:
+ # generate the preview
+ preview_path = self._mngr.get_jpeg_preview(path, width=max_side, height=max_side)
+ # open the preview and return
+ return PIL.Image.open(preview_path)
+ except Exception as err: # FIXME: less generic exception!
+ raise errors.ReaderError(path) from err
+
+## EOF ##
diff --git a/bsie/reader/preview/_pillow.py b/bsie/reader/preview/_pillow.py
new file mode 100644
index 0000000..2b797c6
--- /dev/null
+++ b/bsie/reader/preview/_pillow.py
@@ -0,0 +1,39 @@
+
+# standard imports
+from functools import partial
+import typing
+
+# external imports
+import PIL.Image
+
+# bsie imports
+from bsie.utils import errors
+
+# inner-module imports
+from . import utils
+from .. import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'PillowPreviewReader',
+ )
+
+
+## code ##
+
+class PillowPreviewReader(base.Reader):
+ """Produce previews for image files using the Pillow library."""
+
+ def __call__(self, path: str) -> typing.Callable[[int], PIL.Image.Image]:
+ try:
+ # open file with PIL
+ img = PIL.Image.open(path)
+ # return callback
+ return partial(utils.resize, img)
+ except PIL.UnidentifiedImageError as err:
+ # failed to open, skip file
+ raise errors.UnsupportedFileFormatError(path) from err
+ except OSError as err:
+ raise errors.ReaderError(path) from err
+
+# EOF ##
diff --git a/bsie/reader/preview/_rawpy.py b/bsie/reader/preview/_rawpy.py
new file mode 100644
index 0000000..16e8675
--- /dev/null
+++ b/bsie/reader/preview/_rawpy.py
@@ -0,0 +1,61 @@
+
+# standard imports
+from functools import partial
+import typing
+
+# external imports
+import PIL.Image
+import rawpy
+
+# bsie imports
+from bsie.utils import errors, filematcher
+
+# inner-module imports
+from . import utils
+from .. import base
+
+# constants
+MATCH_RULE = 'mime={image/x-nikon-nef} | extension={nef}'
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'RawpyPreviewReader',
+ )
+
+
+## code ##
+
+class RawpyPreviewReader(base.Reader):
+ """Produce previews for raw image files using the rawpy library."""
+
+ # file matcher
+ _match: filematcher.Matcher
+
+ # additional kwargs to rawpy's postprocess
+ _rawpy_kwargs: typing.Dict[str, typing.Any]
+
+ def __init__(self, **rawpy_kwargs):
+ match_rule = rawpy_kwargs.pop('file_match_rule', MATCH_RULE)
+ self._match = filematcher.parse(match_rule)
+ self._rawpy_kwargs = rawpy_kwargs
+
+ def __call__(self, path: str) -> typing.Callable[[int], PIL.Image.Image]:
+ # perform quick checks first
+ if not self._match(path):
+ raise errors.UnsupportedFileFormatError(path)
+
+ try:
+ # open file with rawpy
+ ary = rawpy.imread(path).postprocess(**self._rawpy_kwargs)
+ # convert to PIL.Image
+ img = PIL.Image.fromarray(ary)
+ # return callback
+ return partial(utils.resize, img)
+
+ except (rawpy.LibRawFatalError, # pylint: disable=no-member # pylint doesn't find the errors
+ rawpy.NotSupportedError, # pylint: disable=no-member
+ rawpy.LibRawNonFatalError, # pylint: disable=no-member
+ ) as err:
+ raise errors.ReaderError(path) from err
+
+## EOF ##
diff --git a/bsie/reader/preview/utils.py b/bsie/reader/preview/utils.py
new file mode 100644
index 0000000..82ecc31
--- /dev/null
+++ b/bsie/reader/preview/utils.py
@@ -0,0 +1,34 @@
+
+# standard imports
+import typing
+
+# external imports
+import PIL.Image
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'resize',
+ )
+
+
+## code ##
+
+def resize(
+ img: PIL.Image.Image,
+ max_size: int,
+ ) -> PIL.Image.Image:
+ """Resize an image to a given maximum side length."""
+ # determine target dimensions
+ ratio = img.width / img.height
+ if img.width > img.height:
+ width, height = max_size, round(max_size / ratio)
+ else:
+ width, height = round(ratio * max_size), max_size
+ # rescale and return
+ return img.resize(
+ (width, height),
+ resample=PIL.Image.Resampling.LANCZOS, # create high-quality image
+ reducing_gap=3.0, # optimize computation via fast size reduction
+ )
+
+## EOF ##
diff --git a/bsie/reader/stat.py b/bsie/reader/stat.py
index fc5fb24..f42e7fb 100644
--- a/bsie/reader/stat.py
+++ b/bsie/reader/stat.py
@@ -1,15 +1,14 @@
"""The Stat reader produces filesystem stat information.
-
-Part of the bsie module.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import typing
# bsie imports
-from bsie.base import errors, reader
+from bsie.utils import errors
+
+# inner-module imports
+from . import base
# exports
__all__: typing.Sequence[str] = (
@@ -19,7 +18,7 @@ __all__: typing.Sequence[str] = (
## code ##
-class Stat(reader.Reader):
+class Stat(base.Reader):
"""Read and return the filesystem's stat infos."""
def __call__(self, path: str) -> os.stat_result: