diff options
author | Matthias Baumgartner <dev@igsor.net> | 2023-02-08 19:25:19 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2023-02-08 19:25:19 +0100 |
commit | 7bf6b33fa6d6b901e4933bfe0b2a9939d7b3f3f3 (patch) | |
tree | d280d9d1e19e4f7a9d0d4b5405603c729e1fdcce /bsie | |
parent | 05a841215c82ef40d4679dfc4d2c26572bd4d349 (diff) | |
parent | 0d0144466919cfb168e75c2af26d5cb74e10bfa0 (diff) | |
download | bsie-7bf6b33fa6d6b901e4933bfe0b2a9939d7b3f3f3.tar.gz bsie-7bf6b33fa6d6b901e4933bfe0b2a9939d7b3f3f3.tar.bz2 bsie-7bf6b33fa6d6b901e4933bfe0b2a9939d7b3f3f3.zip |
Merge branch 'previews' into develop
Diffstat (limited to 'bsie')
-rw-r--r-- | bsie/apps/index.py | 23 | ||||
-rw-r--r-- | bsie/apps/info.py | 4 | ||||
-rw-r--r-- | bsie/extractor/base.py | 1 | ||||
-rw-r--r-- | bsie/extractor/image/colors_spatial.py | 2 | ||||
-rw-r--r-- | bsie/extractor/preview.py | 99 | ||||
-rw-r--r-- | bsie/lib/__init__.py | 1 | ||||
-rw-r--r-- | bsie/lib/bsie.py | 10 | ||||
-rw-r--r-- | bsie/lib/builder.py | 9 | ||||
-rw-r--r-- | bsie/lib/naming_policy.py | 120 | ||||
-rw-r--r-- | bsie/lib/pipeline.py | 18 | ||||
-rw-r--r-- | bsie/reader/chain.py | 11 | ||||
-rw-r--r-- | bsie/reader/image/__init__.py | 1 | ||||
-rw-r--r-- | bsie/reader/image/_pillow.py | 2 | ||||
-rw-r--r-- | bsie/reader/image/_raw.py | 6 | ||||
-rw-r--r-- | bsie/reader/preview/__init__.py | 39 | ||||
-rw-r--r-- | bsie/reader/preview/_pg.py | 86 | ||||
-rw-r--r-- | bsie/reader/preview/_pillow.py | 44 | ||||
-rw-r--r-- | bsie/reader/preview/_rawpy.py | 66 | ||||
-rw-r--r-- | bsie/reader/preview/utils.py | 39 | ||||
-rw-r--r-- | bsie/utils/namespaces.py | 4 | ||||
-rw-r--r-- | bsie/utils/node.py | 29 |
21 files changed, 566 insertions, 48 deletions
diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 21c2318..8798c49 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -11,9 +11,9 @@ import typing # bsie imports from bsie.extractor import ExtractorBuilder -from bsie.lib import BSIE, PipelineBuilder +from bsie.lib import BSIE, PipelineBuilder, DefaultNamingPolicy from bsie.reader import ReaderBuilder -from bsie.utils import bsfs, errors +from bsie.utils import bsfs, errors, node as node_ # exports __all__: typing.Sequence[str] = ( @@ -26,7 +26,9 @@ __all__: typing.Sequence[str] = ( def main(argv): """Index files or directories into BSFS.""" parser = argparse.ArgumentParser(description=main.__doc__, prog='index') - parser.add_argument('--user', type=bsfs.URI, default=bsfs.URI('http://example.com/me'), + parser.add_argument('--host', type=bsfs.URI, default=bsfs.URI('http://example.com'), + help='') + parser.add_argument('--user', type=str, default='me', help='') parser.add_argument('--collect', action='append', default=[], help='') @@ -47,6 +49,9 @@ def main(argv): rbuild = ReaderBuilder() # extractor builder ebuild = ExtractorBuilder([ + {'bsie.extractor.preview.Preview': { + 'max_sides': [50], + }}, {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( @@ -66,16 +71,19 @@ def main(argv): ]) # pipeline builder pbuild = PipelineBuilder( - bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')), rbuild, ebuild, ) # build pipeline pipeline = pbuild.build() + # build the naming policy + naming_policy = DefaultNamingPolicy( + host=args.host, + user=args.user, + ) # build BSIE frontend - bsie = BSIE(pipeline, args.collect, args.discard) - + bsie = BSIE(pipeline, naming_policy, args.collect, args.discard) def walk(handle): """Walk through given input files.""" @@ -83,7 +91,6 @@ def main(argv): # FIXME: simplify code (below but maybe also above) # FIXME: How to handle dependencies between data? # E.g. do I still want to link to a tag despite not being permitted to set its label? - # FIXME: node renaming? # index input paths for path in args.input_file: @@ -112,6 +119,8 @@ def main(argv): store.migrate(bsie.schema) # process files def handle(node, pred, value): + if isinstance(value, node_.Node): + value = store.node(value.node_type, value.uri) store.node(node.node_type, node.uri).set(pred.uri, value) walk(handle) # return store diff --git a/bsie/apps/info.py b/bsie/apps/info.py index 64a4eba..750aedc 100644 --- a/bsie/apps/info.py +++ b/bsie/apps/info.py @@ -35,6 +35,9 @@ def main(argv): rbuild = ReaderBuilder() # extractor builder ebuild = ExtractorBuilder([ + {'bsie.extractor.preview.Preview': { + 'max_sides': [50, 200], + }}, {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( @@ -54,7 +57,6 @@ def main(argv): ]) # pipeline builder pbuild = PipelineBuilder( - bsfs.Namespace('http://example.com/me/'), # not actually used rbuild, ebuild, ) diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py index 7401244..89183f9 100644 --- a/bsie/extractor/base.py +++ b/bsie/extractor/base.py @@ -30,6 +30,7 @@ SCHEMA_PREAMBLE = ''' # common bsfs prefixes prefix bsfs: <http://bsfs.ai/schema/> prefix bse: <http://bsfs.ai/schema/Entity#> + prefix bsp: <http://bsfs.ai/schema/Preview#> # default definitions bsfs:Array rdfs:subClassOf bsfs:Literal . diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py index ce5b9f2..15fd281 100644 --- a/bsie/extractor/image/colors_spatial.py +++ b/bsie/extractor/image/colors_spatial.py @@ -120,7 +120,7 @@ class ColorsSpatial(base.Extractor): def extract( self, subject: node.Node, - content: PIL.Image, + content: PIL.Image.Image, principals: typing.Iterable[bsfs.schema.Predicate], ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: # check principals diff --git a/bsie/extractor/preview.py b/bsie/extractor/preview.py new file mode 100644 index 0000000..1531d62 --- /dev/null +++ b/bsie/extractor/preview.py @@ -0,0 +1,99 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import io +import typing + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import bsfs, node, ns + +# inner-module imports +from . import base + +# exports +__all__: typing.Sequence[str] = ( + 'Preview', + ) + + +## code ## + +class Preview(base.Extractor): + """Extract previews.""" + + CONTENT_READER = 'bsie.reader.preview.Preview' + + def __init__(self, max_sides: typing.Iterable[int]): + super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' + + bsfs:Preview rdfs:subClassOf bsfs:Node . + bsfs:BinaryBlob rdfs:subClassOf bsfs:Literal . + bsfs:JPEG rdfs:subClassOf bsfs:BinaryBlob . + + bse:preview rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range bsfs:Preview ; + bsfs:unique "false"^^xsd:boolean . + + bsp:width rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Preview ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bsp:height rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Preview ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + + bsp:asset rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Preview ; + rdfs:range bsfs:JPEG ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + # initialize extra args + self.max_sides = set(max_sides) + + def __eq__(self, other: typing.Any) -> bool: + return super().__eq__(other) \ + and self.max_sides == other.max_sides + + def __hash__(self) -> int: + return hash((super().__hash__(), tuple(sorted(self.max_sides)))) + + def extract( + self, + subject: node.Node, + content: typing.Callable[[int], PIL.Image.Image], + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + # check principals + if self.schema.predicate(ns.bse.preview) not in principals: + return + + for max_side in self.max_sides: + # get the preview in the right resolution + img = content(max_side) + # convert the preview to jpeg + buffer = io.BytesIO() + img.save(buffer, format='jpeg') + # create a preview node + preview = node.Node(ns.bsfs.Preview, + ucid=bsfs.uuid.UCID.from_bytes(buffer.getvalue()), + size=max_side, + source=subject, + ) + # yield triples + yield subject, self.schema.predicate(ns.bse.preview), preview + yield preview, self.schema.predicate(ns.bsp.width), img.width + yield preview, self.schema.predicate(ns.bsp.height), img.height + yield preview, self.schema.predicate(ns.bsp.asset), buffer.getvalue() + +## EOF ## diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py index 4239d3b..48379de 100644 --- a/bsie/lib/__init__.py +++ b/bsie/lib/__init__.py @@ -10,6 +10,7 @@ import typing # inner-module imports from .bsie import BSIE from .builder import PipelineBuilder +from .naming_policy import DefaultNamingPolicy # exports __all__: typing.Sequence[str] = ( diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py index 668783d..a572525 100644 --- a/bsie/lib/bsie.py +++ b/bsie/lib/bsie.py @@ -11,6 +11,7 @@ import typing from bsie.utils import bsfs, node, ns # inner-module imports +from .naming_policy import NamingPolicy from .pipeline import Pipeline # exports @@ -41,15 +42,18 @@ class BSIE(): def __init__( self, - # pipeline builder. + # pipeline. pipeline: Pipeline, + # naming policy + naming_policy: NamingPolicy, # principals to extract at most. None implies all available w.r.t. extractors. collect: typing.Optional[typing.Iterable[bsfs.URI]] = None, # principals to discard. discard: typing.Optional[typing.Iterable[bsfs.URI]] = None, ): - # store pipeline + # store pipeline and naming policy self._pipeline = pipeline + self._naming_policy = naming_policy # start off with available principals self._principals = {pred.uri for pred in self._pipeline.principals} # limit principals to specified ones by argument. @@ -89,6 +93,6 @@ class BSIE(): # predicate lookup principals = {self.schema.predicate(pred) for pred in principals} # invoke pipeline - yield from self._pipeline(path, principals) + yield from self._naming_policy(self._pipeline(path, principals)) ## EOF ## diff --git a/bsie/lib/builder.py b/bsie/lib/builder.py index c2abffe..39da441 100644 --- a/bsie/lib/builder.py +++ b/bsie/lib/builder.py @@ -11,7 +11,7 @@ import typing # bsie imports from bsie.extractor import ExtractorBuilder from bsie.reader import ReaderBuilder -from bsie.utils import bsfs, errors +from bsie.utils import errors # inner-module imports from . import pipeline @@ -29,9 +29,6 @@ logger = logging.getLogger(__name__) class PipelineBuilder(): """Build `bsie.tools.pipeline.Pipeline` instances.""" - # Prefix to be used in the Pipeline. - prefix: bsfs.Namespace - # builder for Readers. rbuild: ReaderBuilder @@ -40,11 +37,9 @@ class PipelineBuilder(): def __init__( self, - prefix: bsfs.Namespace, reader_builder: ReaderBuilder, extractor_builder: ExtractorBuilder, ): - self.prefix = prefix self.rbuild = reader_builder self.ebuild = extractor_builder @@ -80,6 +75,6 @@ class PipelineBuilder(): except errors.BuilderError as err: # failed to build reader logger.error(str(err)) - return pipeline.Pipeline(self.prefix, ext2rdr) + return pipeline.Pipeline(ext2rdr) ## EOF ## diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py new file mode 100644 index 0000000..131a70b --- /dev/null +++ b/bsie/lib/naming_policy.py @@ -0,0 +1,120 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import abc +import os +import typing + +# bsie imports +from bsie.utils import bsfs, errors, ns +from bsie.utils.node import Node + +# exports +__all__: typing.Sequence[str] = ( + 'DefaultNamingPolicy', + ) + + +## code ## + +class NamingPolicy(): + """Determine node uri's from node hints.""" + def __call__( + self, + iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]], + ): + """Apply the policy on a triple iterator.""" + return NamingPolicyIterator(self, iterable) + + @abc.abstractmethod + def handle_node(self, node: Node) -> Node: + """Apply the policy on a node.""" + + +class NamingPolicyIterator(): + """Iterates over triples, determines uris according to a *policy* as it goes.""" + + # source triple iterator. + _iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]] + + # naming policy + _policy: NamingPolicy + + def __init__( + self, + policy: NamingPolicy, + iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]], + ): + self._iterable = iterable + self._policy = policy + + def __iter__(self): + for node, pred, value in self._iterable: + # handle subject + self._policy.handle_node(node) + # handle value + if isinstance(value, Node): + self._policy.handle_node(value) + # yield triple + yield node, pred, value + + +class DefaultNamingPolicy(NamingPolicy): + """Compose URIs as <host/user/node_type#fragment> + + What information is used as fragment depends on the node type. + Typically, the default is to use the "ucid" hint. + The fallback in all cases is to generate a random uuid. + + Never changes previously assigned uris. Sets uris in-place. + + """ + + def __init__( + self, + host: bsfs.URI, + user: str, + ): + self._prefix = bsfs.Namespace(os.path.join(host, user)) + self._uuid = bsfs.uuid.UUID() + + def handle_node(self, node: Node) -> Node: + if node.uri is not None: + return node + if node.node_type == ns.bsfs.File: + return self.name_file(node) + if node.node_type == ns.bsfs.Preview: + return self.name_preview(node) + raise errors.ProgrammingError('no naming policy available for {node.node_type}') + + def name_file(self, node: Node) -> Node: + """Set a bsfs:File node's uri fragment to its ucid.""" + if 'ucid' in node.hints: # content id + fragment = node.hints['ucid'] + else: # random name + fragment = self._uuid() + node.uri = (self._prefix + 'file')[fragment] + return node + + def name_preview(self, node: Node) -> Node: + """Set a bsfs:Preview node's uri fragment to its ucid. + Uses its source fragment as fallback. Appends the size if provided. + """ + fragment = None + if 'ucid' in node.hints: # content id + fragment = node.hints['ucid'] + if fragment is None and 'source' in node.hints: # source id + self.handle_node(node.hints['source']) + fragment = node.hints['source'].uri.get('fragment', None) + if fragment is None: # random name + fragment = self._uuid() + if 'size' in node.hints: # append size + fragment += '_s' + str(node.hints['size']) + node.uri = (self._prefix + 'preview')[fragment] + return node + +## EOF ## diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py index 44685ba..0bc5109 100644 --- a/bsie/lib/pipeline.py +++ b/bsie/lib/pipeline.py @@ -19,8 +19,6 @@ __all__: typing.Sequence[str] = ( 'Pipeline', ) -# constants -FILE_PREFIX = 'file#' ## code ## @@ -40,19 +38,14 @@ class Pipeline(): # combined extractor schemas. _schema: bsfs.schema.Schema - # node prefix. - _prefix: bsfs.Namespace - # extractor -> reader mapping _ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]] def __init__( self, - prefix: bsfs.Namespace, ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]] ): # store core members - self._prefix = prefix + FILE_PREFIX self._ext2rdr = ext2rdr # compile schema from all extractors self._schema = bsfs.schema.Schema.Union(ext.schema for ext in ext2rdr) @@ -64,12 +57,11 @@ class Pipeline(): return f'{bsfs.typename(self)}(...)' def __hash__(self) -> int: - return hash((type(self), self._prefix, self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values()))) + return hash((type(self), self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values()))) def __eq__(self, other: typing.Any) -> bool: return isinstance(other, type(self)) \ and self._schema == other._schema \ - and self._prefix == other._prefix \ and self._ext2rdr == other._ext2rdr @property @@ -117,8 +109,9 @@ class Pipeline(): rdr2ext[rdr].add(ext) # create subject for file - uuid = bsfs.uuid.UCID.from_path(path) - subject = node.Node(ns.bsfs.File, self._prefix[uuid]) + subject = node.Node(ns.bsfs.File, + ucid=bsfs.uuid.UCID.from_path(path), + ) # extract information for rdr, extrs in rdr2ext.items(): @@ -131,8 +124,7 @@ class Pipeline(): for ext in extrs: try: # get predicate/value tuples - for subject, pred, value in ext.extract(subject, content, principals): - yield subject, pred, value + yield from ext.extract(subject, content, principals) except errors.ExtractorError as err: # critical extractor failure. diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py index 5e9e0d5..1dbc52b 100644 --- a/bsie/reader/chain.py +++ b/bsie/reader/chain.py @@ -73,16 +73,19 @@ class ReaderChain(base.Reader, typing.Generic[T_CONTENT]): return hash((super().__hash__(), self._children)) def __call__(self, path: str) -> T_CONTENT: - raise_error = errors.UnsupportedFileFormatError + raise_error = False for child in self._children: try: return child(path) except errors.UnsupportedFileFormatError: + # child cannot read the file, skip. pass except errors.ReaderError: - # child cannot read the file, skip. - raise_error = errors.ReaderError # type: ignore [assignment] # mypy is confused + # child failed to read the file, skip. + raise_error = True - raise raise_error(path) + if raise_error: + raise errors.ReaderError(path) + raise errors.UnsupportedFileFormatError(path) ## EOF ## diff --git a/bsie/reader/image/__init__.py b/bsie/reader/image/__init__.py index 1f290b5..c5d2a2a 100644 --- a/bsie/reader/image/__init__.py +++ b/bsie/reader/image/__init__.py @@ -27,7 +27,6 @@ __all__: typing.Sequence[str] = ( ## code ## -# FIXME: Check if PIL.Image or PIL.Image.Image, or if version-dependent class Image(chain.ReaderChain[PIL.Image.Image]): # pylint: disable=too-few-public-methods """Read an image file.""" diff --git a/bsie/reader/image/_pillow.py b/bsie/reader/image/_pillow.py index 3144509..5b2bdf2 100644 --- a/bsie/reader/image/_pillow.py +++ b/bsie/reader/image/_pillow.py @@ -27,7 +27,7 @@ __all__: typing.Sequence[str] = ( class PillowImage(base.Reader): """Use PIL to read content of a variety of image file types.""" - def __call__(self, path: str) -> PIL.Image: + def __call__(self, path: str) -> PIL.Image.Image: try: # open file with PIL return PIL.Image.open(path) diff --git a/bsie/reader/image/_raw.py b/bsie/reader/image/_raw.py index cd60453..257fdb3 100644 --- a/bsie/reader/image/_raw.py +++ b/bsie/reader/image/_raw.py @@ -32,17 +32,17 @@ class RawImage(base.Reader): """Use rawpy to read content of raw image file types.""" # file matcher - match: filematcher.Matcher + _match: filematcher.Matcher # additional kwargs to rawpy's postprocess - rawpy_kwargs: typing.Dict[str, typing.Any] + _rawpy_kwargs: typing.Dict[str, typing.Any] def __init__(self, **rawpy_kwargs): match_rule = rawpy_kwargs.pop('file_match_rule', MATCH_RULE) self._match = filematcher.parse(match_rule) self._rawpy_kwargs = rawpy_kwargs - def __call__(self, path: str) -> PIL.Image: + def __call__(self, path: str) -> PIL.Image.Image: # perform quick checks first if not self._match(path): raise errors.UnsupportedFileFormatError(path) diff --git a/bsie/reader/preview/__init__.py b/bsie/reader/preview/__init__.py new file mode 100644 index 0000000..3e69a4a --- /dev/null +++ b/bsie/reader/preview/__init__.py @@ -0,0 +1,39 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# external imports +import PIL.Image + +# inner-module imports +from .. import chain + +# constants +_FILE_FORMAT_READERS: typing.Sequence[str] = ( + # native image formats + __package__ + '._pillow.PillowPreviewReader', + __package__ + '._rawpy.RawpyPreviewReader', + # multiformat readers + __package__ + '._pg.PreviewGeneratorReader', + ) + +# exports +__all__: typing.Sequence[str] = ( + 'Preview', + ) + + +## code ## + +class Preview(chain.ReaderChain[typing.Callable[[int], PIL.Image.Image]]): # pylint: disable=too-few-public-methods + """Create a preview from a file.""" + + def __init__(self, cfg: typing.Optional[typing.Any] = None): + super().__init__(_FILE_FORMAT_READERS, cfg) + +## EOF ## diff --git a/bsie/reader/preview/_pg.py b/bsie/reader/preview/_pg.py new file mode 100644 index 0000000..097c513 --- /dev/null +++ b/bsie/reader/preview/_pg.py @@ -0,0 +1,86 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from functools import partial +import contextlib +import io +import os +import shutil +import tempfile +import typing + +# external imports +from preview_generator.manager import PreviewManager +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# inner-module imports +from .. import base + +# exports +__all__: typing.Sequence[str] = ( + 'PreviewGeneratorReader', + ) + + +## code ## + +class PreviewGeneratorReader(base.Reader): + """Uses preview_generator to create previews for various data formats. + See `https://github.com/algoo/preview-generator`_ for details. + """ + + # PreviewManager instance. + _mngr: PreviewManager + + # Set of mime types supported by PreviewManager. + _supported_mimetypes: typing.Set[str] + + # PreviewManager cache. + _cache: str + + # Determines whether the cache directory should be deleted after use. + _cleanup: bool + + def __init__(self, cache: typing.Optional[str] = None): + # initialize cache directory + # TODO: initialize in memory, e.g., via PyFilesystem + if cache is None: + self._cache = tempfile.mkdtemp(prefix='bsie-preview-cache-') + self._cleanup = True + else: + self._cache = cache + self._cleanup = False + # create preview generator + with contextlib.redirect_stderr(io.StringIO()): + self._mngr = PreviewManager(self._cache, create_folder=True) + self._supported_mimetypes = set(self._mngr.get_supported_mimetypes()) + + def __del__(self): + if self._cleanup: + shutil.rmtree(self._cache, ignore_errors=True) + + def __call__(self, path: str) -> typing.Callable[[int], PIL.Image.Image]: + if not os.path.exists(path): + raise errors.ReaderError(path) + if self._mngr.get_mimetype(path) not in self._supported_mimetypes: + raise errors.UnsupportedFileFormatError(path) + return partial(self._preview_callback, path) + + def _preview_callback(self, path: str, max_side: int) -> PIL.Image.Image: + """Produce a jpeg preview of *path* with at most *max_side* side length.""" + try: + # generate the preview + preview_path = self._mngr.get_jpeg_preview(path, width=max_side, height=max_side) + # open the preview and return + return PIL.Image.open(preview_path) + except Exception as err: # FIXME: less generic exception! + raise errors.ReaderError(path) from err + +## EOF ## diff --git a/bsie/reader/preview/_pillow.py b/bsie/reader/preview/_pillow.py new file mode 100644 index 0000000..174d509 --- /dev/null +++ b/bsie/reader/preview/_pillow.py @@ -0,0 +1,44 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from functools import partial +import typing + +# external imports +import PIL.Image + +# bsie imports +from bsie.utils import errors + +# inner-module imports +from . import utils +from .. import base + +# exports +__all__: typing.Sequence[str] = ( + 'PillowPreviewReader', + ) + + +## code ## + +class PillowPreviewReader(base.Reader): + """Produce previews for image files using the Pillow library.""" + + def __call__(self, path: str) -> typing.Callable[[int], PIL.Image.Image]: + try: + # open file with PIL + img = PIL.Image.open(path) + # return callback + return partial(utils.resize, img) + except PIL.UnidentifiedImageError as err: + # failed to open, skip file + raise errors.UnsupportedFileFormatError(path) from err + except IOError as err: + raise errors.ReaderError(path) from err + +# EOF ## diff --git a/bsie/reader/preview/_rawpy.py b/bsie/reader/preview/_rawpy.py new file mode 100644 index 0000000..2c20a48 --- /dev/null +++ b/bsie/reader/preview/_rawpy.py @@ -0,0 +1,66 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +from functools import partial +import typing + +# external imports +import PIL.Image +import rawpy + +# bsie imports +from bsie.utils import errors, filematcher + +# inner-module imports +from . import utils +from .. import base + +# constants +MATCH_RULE = 'mime={image/x-nikon-nef} | extension={nef}' + +# exports +__all__: typing.Sequence[str] = ( + 'RawpyPreviewReader', + ) + + +## code ## + +class RawpyPreviewReader(base.Reader): + """Produce previews for raw image files using the rawpy library.""" + + # file matcher + _match: filematcher.Matcher + + # additional kwargs to rawpy's postprocess + _rawpy_kwargs: typing.Dict[str, typing.Any] + + def __init__(self, **rawpy_kwargs): + match_rule = rawpy_kwargs.pop('file_match_rule', MATCH_RULE) + self._match = filematcher.parse(match_rule) + self._rawpy_kwargs = rawpy_kwargs + + def __call__(self, path: str) -> typing.Callable[[int], PIL.Image.Image]: + # perform quick checks first + if not self._match(path): + raise errors.UnsupportedFileFormatError(path) + + try: + # open file with rawpy + ary = rawpy.imread(path).postprocess(**self._rawpy_kwargs) + # convert to PIL.Image + img = PIL.Image.fromarray(ary) + # return callback + return partial(utils.resize, img) + + except (rawpy.LibRawFatalError, # pylint: disable=no-member # pylint doesn't find the errors + rawpy.NotSupportedError, # pylint: disable=no-member + rawpy.LibRawNonFatalError, # pylint: disable=no-member + ) as err: + raise errors.ReaderError(path) from err + +## EOF ## diff --git a/bsie/reader/preview/utils.py b/bsie/reader/preview/utils.py new file mode 100644 index 0000000..2ef1562 --- /dev/null +++ b/bsie/reader/preview/utils.py @@ -0,0 +1,39 @@ +""" + +Part of the tagit module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import typing + +# external imports +import PIL.Image + +# exports +__all__: typing.Sequence[str] = ( + 'resize', + ) + + +## code ## + +def resize( + img: PIL.Image.Image, + max_size: int, + ) -> PIL.Image.Image: + """Resize an image to a given maximum side length.""" + # determine target dimensions + ratio = img.width / img.height + if img.width > img.height: + width, height = max_size, round(max_size / ratio) + else: + width, height = round(ratio * max_size), max_size + # rescale and return + return img.resize( + (width, height), + resample=PIL.Image.Resampling.LANCZOS, # create high-quality image + reducing_gap=3.0, # optimize computation via fast size reduction + ) + +## EOF ## diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py index 393b436..0af8ece 100644 --- a/bsie/utils/namespaces.py +++ b/bsie/utils/namespaces.py @@ -12,16 +12,18 @@ from . import bsfs as _bsfs # constants bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity') +bsf = _bsfs.Namespace('http://ie.bsfs.ai/schema/Feature') bsfs = _bsfs.Namespace('http://bsfs.ai/schema', fsep='/') bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta') +bsp = _bsfs.Namespace('http://bsfs.ai/schema/Preview') xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema') -bsf = _bsfs.Namespace('http://ie.bsfs.ai/schema/Feature') # export __all__: typing.Sequence[str] = ( 'bse', 'bsfs', 'bsm', + 'bsp', 'xsd', ) diff --git a/bsie/utils/node.py b/bsie/utils/node.py index 91e4f37..aa62c06 100644 --- a/bsie/utils/node.py +++ b/bsie/utils/node.py @@ -19,30 +19,47 @@ __all__: typing.Sequence[str] = ( ## code ## class Node(): - """Lightweight Node, disconnected from any bsfs structures.""" + """Lightweight Node, disconnected from any bsfs structures. + + In most cases, provide *hints* and leave setting the uri to a node + naming policy. Only provide an *uri* if it is absolutely determined. + + """ # node type. node_type: bsfs.URI # node URI. - uri: bsfs.URI + uri: typing.Optional[bsfs.URI] + + # node naming hints. + hits: dict def __init__( self, node_type: bsfs.URI, - uri: bsfs.URI, + uri: typing.Optional[bsfs.URI] = None, + **uri_hints, ): # assign members self.node_type = bsfs.URI(node_type) - self.uri = bsfs.URI(uri) + self.hints = uri_hints + self.uri = uri def __eq__(self, other: typing.Any) -> bool: + """Compare two Node instances based on type and uri. + Compares hits only if the uri is not yet specified. + """ return isinstance(other, Node) \ and other.node_type == self.node_type \ - and other.uri == self.uri + and other.uri == self.uri \ + and (self.uri is not None or self.hints == other.hints) def __hash__(self) -> int: - return hash((type(self), self.node_type, self.uri)) + identifier = self.uri + if identifier is None: + identifier = tuple((key, self.hints[key]) for key in sorted(self.hints)) + return hash((type(self), self.node_type, identifier)) def __str__(self) -> str: return f'{bsfs.typename(self)}({self.node_type}, {self.uri})' |