aboutsummaryrefslogtreecommitdiffstats
path: root/bsie
diff options
context:
space:
mode:
Diffstat (limited to 'bsie')
-rw-r--r--bsie/apps/index.py23
-rw-r--r--bsie/apps/info.py4
-rw-r--r--bsie/extractor/base.py1
-rw-r--r--bsie/extractor/image/colors_spatial.py2
-rw-r--r--bsie/extractor/preview.py99
-rw-r--r--bsie/lib/__init__.py1
-rw-r--r--bsie/lib/bsie.py10
-rw-r--r--bsie/lib/builder.py9
-rw-r--r--bsie/lib/naming_policy.py120
-rw-r--r--bsie/lib/pipeline.py18
-rw-r--r--bsie/reader/chain.py11
-rw-r--r--bsie/reader/image/__init__.py1
-rw-r--r--bsie/reader/image/_pillow.py2
-rw-r--r--bsie/reader/image/_raw.py6
-rw-r--r--bsie/reader/preview/__init__.py39
-rw-r--r--bsie/reader/preview/_pg.py86
-rw-r--r--bsie/reader/preview/_pillow.py44
-rw-r--r--bsie/reader/preview/_rawpy.py66
-rw-r--r--bsie/reader/preview/utils.py39
-rw-r--r--bsie/utils/namespaces.py4
-rw-r--r--bsie/utils/node.py29
21 files changed, 566 insertions, 48 deletions
diff --git a/bsie/apps/index.py b/bsie/apps/index.py
index 21c2318..8798c49 100644
--- a/bsie/apps/index.py
+++ b/bsie/apps/index.py
@@ -11,9 +11,9 @@ import typing
# bsie imports
from bsie.extractor import ExtractorBuilder
-from bsie.lib import BSIE, PipelineBuilder
+from bsie.lib import BSIE, PipelineBuilder, DefaultNamingPolicy
from bsie.reader import ReaderBuilder
-from bsie.utils import bsfs, errors
+from bsie.utils import bsfs, errors, node as node_
# exports
__all__: typing.Sequence[str] = (
@@ -26,7 +26,9 @@ __all__: typing.Sequence[str] = (
def main(argv):
"""Index files or directories into BSFS."""
parser = argparse.ArgumentParser(description=main.__doc__, prog='index')
- parser.add_argument('--user', type=bsfs.URI, default=bsfs.URI('http://example.com/me'),
+ parser.add_argument('--host', type=bsfs.URI, default=bsfs.URI('http://example.com'),
+ help='')
+ parser.add_argument('--user', type=str, default='me',
help='')
parser.add_argument('--collect', action='append', default=[],
help='')
@@ -47,6 +49,9 @@ def main(argv):
rbuild = ReaderBuilder()
# extractor builder
ebuild = ExtractorBuilder([
+ {'bsie.extractor.preview.Preview': {
+ 'max_sides': [50],
+ }},
{'bsie.extractor.generic.path.Path': {}},
{'bsie.extractor.generic.stat.Stat': {}},
{'bsie.extractor.generic.constant.Constant': dict(
@@ -66,16 +71,19 @@ def main(argv):
])
# pipeline builder
pbuild = PipelineBuilder(
- bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')),
rbuild,
ebuild,
)
# build pipeline
pipeline = pbuild.build()
+ # build the naming policy
+ naming_policy = DefaultNamingPolicy(
+ host=args.host,
+ user=args.user,
+ )
# build BSIE frontend
- bsie = BSIE(pipeline, args.collect, args.discard)
-
+ bsie = BSIE(pipeline, naming_policy, args.collect, args.discard)
def walk(handle):
"""Walk through given input files."""
@@ -83,7 +91,6 @@ def main(argv):
# FIXME: simplify code (below but maybe also above)
# FIXME: How to handle dependencies between data?
# E.g. do I still want to link to a tag despite not being permitted to set its label?
- # FIXME: node renaming?
# index input paths
for path in args.input_file:
@@ -112,6 +119,8 @@ def main(argv):
store.migrate(bsie.schema)
# process files
def handle(node, pred, value):
+ if isinstance(value, node_.Node):
+ value = store.node(value.node_type, value.uri)
store.node(node.node_type, node.uri).set(pred.uri, value)
walk(handle)
# return store
diff --git a/bsie/apps/info.py b/bsie/apps/info.py
index 64a4eba..750aedc 100644
--- a/bsie/apps/info.py
+++ b/bsie/apps/info.py
@@ -35,6 +35,9 @@ def main(argv):
rbuild = ReaderBuilder()
# extractor builder
ebuild = ExtractorBuilder([
+ {'bsie.extractor.preview.Preview': {
+ 'max_sides': [50, 200],
+ }},
{'bsie.extractor.generic.path.Path': {}},
{'bsie.extractor.generic.stat.Stat': {}},
{'bsie.extractor.generic.constant.Constant': dict(
@@ -54,7 +57,6 @@ def main(argv):
])
# pipeline builder
pbuild = PipelineBuilder(
- bsfs.Namespace('http://example.com/me/'), # not actually used
rbuild,
ebuild,
)
diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py
index 7401244..89183f9 100644
--- a/bsie/extractor/base.py
+++ b/bsie/extractor/base.py
@@ -30,6 +30,7 @@ SCHEMA_PREAMBLE = '''
# common bsfs prefixes
prefix bsfs: <http://bsfs.ai/schema/>
prefix bse: <http://bsfs.ai/schema/Entity#>
+ prefix bsp: <http://bsfs.ai/schema/Preview#>
# default definitions
bsfs:Array rdfs:subClassOf bsfs:Literal .
diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py
index ce5b9f2..15fd281 100644
--- a/bsie/extractor/image/colors_spatial.py
+++ b/bsie/extractor/image/colors_spatial.py
@@ -120,7 +120,7 @@ class ColorsSpatial(base.Extractor):
def extract(
self,
subject: node.Node,
- content: PIL.Image,
+ content: PIL.Image.Image,
principals: typing.Iterable[bsfs.schema.Predicate],
) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
# check principals
diff --git a/bsie/extractor/preview.py b/bsie/extractor/preview.py
new file mode 100644
index 0000000..1531d62
--- /dev/null
+++ b/bsie/extractor/preview.py
@@ -0,0 +1,99 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import io
+import typing
+
+# external imports
+import PIL.Image
+
+# bsie imports
+from bsie.utils import bsfs, node, ns
+
+# inner-module imports
+from . import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Preview',
+ )
+
+
+## code ##
+
+class Preview(base.Extractor):
+ """Extract previews."""
+
+ CONTENT_READER = 'bsie.reader.preview.Preview'
+
+ def __init__(self, max_sides: typing.Iterable[int]):
+ super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + '''
+
+ bsfs:Preview rdfs:subClassOf bsfs:Node .
+ bsfs:BinaryBlob rdfs:subClassOf bsfs:Literal .
+ bsfs:JPEG rdfs:subClassOf bsfs:BinaryBlob .
+
+ bse:preview rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range bsfs:Preview ;
+ bsfs:unique "false"^^xsd:boolean .
+
+ bsp:width rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Preview ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+
+ bsp:height rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Preview ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+
+ bsp:asset rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Preview ;
+ rdfs:range bsfs:JPEG ;
+ bsfs:unique "true"^^xsd:boolean .
+
+ '''))
+ # initialize extra args
+ self.max_sides = set(max_sides)
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) \
+ and self.max_sides == other.max_sides
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), tuple(sorted(self.max_sides))))
+
+ def extract(
+ self,
+ subject: node.Node,
+ content: typing.Callable[[int], PIL.Image.Image],
+ principals: typing.Iterable[bsfs.schema.Predicate],
+ ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ # check principals
+ if self.schema.predicate(ns.bse.preview) not in principals:
+ return
+
+ for max_side in self.max_sides:
+ # get the preview in the right resolution
+ img = content(max_side)
+ # convert the preview to jpeg
+ buffer = io.BytesIO()
+ img.save(buffer, format='jpeg')
+ # create a preview node
+ preview = node.Node(ns.bsfs.Preview,
+ ucid=bsfs.uuid.UCID.from_bytes(buffer.getvalue()),
+ size=max_side,
+ source=subject,
+ )
+ # yield triples
+ yield subject, self.schema.predicate(ns.bse.preview), preview
+ yield preview, self.schema.predicate(ns.bsp.width), img.width
+ yield preview, self.schema.predicate(ns.bsp.height), img.height
+ yield preview, self.schema.predicate(ns.bsp.asset), buffer.getvalue()
+
+## EOF ##
diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py
index 4239d3b..48379de 100644
--- a/bsie/lib/__init__.py
+++ b/bsie/lib/__init__.py
@@ -10,6 +10,7 @@ import typing
# inner-module imports
from .bsie import BSIE
from .builder import PipelineBuilder
+from .naming_policy import DefaultNamingPolicy
# exports
__all__: typing.Sequence[str] = (
diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py
index 668783d..a572525 100644
--- a/bsie/lib/bsie.py
+++ b/bsie/lib/bsie.py
@@ -11,6 +11,7 @@ import typing
from bsie.utils import bsfs, node, ns
# inner-module imports
+from .naming_policy import NamingPolicy
from .pipeline import Pipeline
# exports
@@ -41,15 +42,18 @@ class BSIE():
def __init__(
self,
- # pipeline builder.
+ # pipeline.
pipeline: Pipeline,
+ # naming policy
+ naming_policy: NamingPolicy,
# principals to extract at most. None implies all available w.r.t. extractors.
collect: typing.Optional[typing.Iterable[bsfs.URI]] = None,
# principals to discard.
discard: typing.Optional[typing.Iterable[bsfs.URI]] = None,
):
- # store pipeline
+ # store pipeline and naming policy
self._pipeline = pipeline
+ self._naming_policy = naming_policy
# start off with available principals
self._principals = {pred.uri for pred in self._pipeline.principals}
# limit principals to specified ones by argument.
@@ -89,6 +93,6 @@ class BSIE():
# predicate lookup
principals = {self.schema.predicate(pred) for pred in principals}
# invoke pipeline
- yield from self._pipeline(path, principals)
+ yield from self._naming_policy(self._pipeline(path, principals))
## EOF ##
diff --git a/bsie/lib/builder.py b/bsie/lib/builder.py
index c2abffe..39da441 100644
--- a/bsie/lib/builder.py
+++ b/bsie/lib/builder.py
@@ -11,7 +11,7 @@ import typing
# bsie imports
from bsie.extractor import ExtractorBuilder
from bsie.reader import ReaderBuilder
-from bsie.utils import bsfs, errors
+from bsie.utils import errors
# inner-module imports
from . import pipeline
@@ -29,9 +29,6 @@ logger = logging.getLogger(__name__)
class PipelineBuilder():
"""Build `bsie.tools.pipeline.Pipeline` instances."""
- # Prefix to be used in the Pipeline.
- prefix: bsfs.Namespace
-
# builder for Readers.
rbuild: ReaderBuilder
@@ -40,11 +37,9 @@ class PipelineBuilder():
def __init__(
self,
- prefix: bsfs.Namespace,
reader_builder: ReaderBuilder,
extractor_builder: ExtractorBuilder,
):
- self.prefix = prefix
self.rbuild = reader_builder
self.ebuild = extractor_builder
@@ -80,6 +75,6 @@ class PipelineBuilder():
except errors.BuilderError as err: # failed to build reader
logger.error(str(err))
- return pipeline.Pipeline(self.prefix, ext2rdr)
+ return pipeline.Pipeline(ext2rdr)
## EOF ##
diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py
new file mode 100644
index 0000000..131a70b
--- /dev/null
+++ b/bsie/lib/naming_policy.py
@@ -0,0 +1,120 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import abc
+import os
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, errors, ns
+from bsie.utils.node import Node
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'DefaultNamingPolicy',
+ )
+
+
+## code ##
+
+class NamingPolicy():
+ """Determine node uri's from node hints."""
+ def __call__(
+ self,
+ iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]],
+ ):
+ """Apply the policy on a triple iterator."""
+ return NamingPolicyIterator(self, iterable)
+
+ @abc.abstractmethod
+ def handle_node(self, node: Node) -> Node:
+ """Apply the policy on a node."""
+
+
+class NamingPolicyIterator():
+ """Iterates over triples, determines uris according to a *policy* as it goes."""
+
+ # source triple iterator.
+ _iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]]
+
+ # naming policy
+ _policy: NamingPolicy
+
+ def __init__(
+ self,
+ policy: NamingPolicy,
+ iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]],
+ ):
+ self._iterable = iterable
+ self._policy = policy
+
+ def __iter__(self):
+ for node, pred, value in self._iterable:
+ # handle subject
+ self._policy.handle_node(node)
+ # handle value
+ if isinstance(value, Node):
+ self._policy.handle_node(value)
+ # yield triple
+ yield node, pred, value
+
+
+class DefaultNamingPolicy(NamingPolicy):
+ """Compose URIs as <host/user/node_type#fragment>
+
+ What information is used as fragment depends on the node type.
+ Typically, the default is to use the "ucid" hint.
+ The fallback in all cases is to generate a random uuid.
+
+ Never changes previously assigned uris. Sets uris in-place.
+
+ """
+
+ def __init__(
+ self,
+ host: bsfs.URI,
+ user: str,
+ ):
+ self._prefix = bsfs.Namespace(os.path.join(host, user))
+ self._uuid = bsfs.uuid.UUID()
+
+ def handle_node(self, node: Node) -> Node:
+ if node.uri is not None:
+ return node
+ if node.node_type == ns.bsfs.File:
+ return self.name_file(node)
+ if node.node_type == ns.bsfs.Preview:
+ return self.name_preview(node)
+ raise errors.ProgrammingError('no naming policy available for {node.node_type}')
+
+ def name_file(self, node: Node) -> Node:
+ """Set a bsfs:File node's uri fragment to its ucid."""
+ if 'ucid' in node.hints: # content id
+ fragment = node.hints['ucid']
+ else: # random name
+ fragment = self._uuid()
+ node.uri = (self._prefix + 'file')[fragment]
+ return node
+
+ def name_preview(self, node: Node) -> Node:
+ """Set a bsfs:Preview node's uri fragment to its ucid.
+ Uses its source fragment as fallback. Appends the size if provided.
+ """
+ fragment = None
+ if 'ucid' in node.hints: # content id
+ fragment = node.hints['ucid']
+ if fragment is None and 'source' in node.hints: # source id
+ self.handle_node(node.hints['source'])
+ fragment = node.hints['source'].uri.get('fragment', None)
+ if fragment is None: # random name
+ fragment = self._uuid()
+ if 'size' in node.hints: # append size
+ fragment += '_s' + str(node.hints['size'])
+ node.uri = (self._prefix + 'preview')[fragment]
+ return node
+
+## EOF ##
diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py
index 44685ba..0bc5109 100644
--- a/bsie/lib/pipeline.py
+++ b/bsie/lib/pipeline.py
@@ -19,8 +19,6 @@ __all__: typing.Sequence[str] = (
'Pipeline',
)
-# constants
-FILE_PREFIX = 'file#'
## code ##
@@ -40,19 +38,14 @@ class Pipeline():
# combined extractor schemas.
_schema: bsfs.schema.Schema
- # node prefix.
- _prefix: bsfs.Namespace
-
# extractor -> reader mapping
_ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]]
def __init__(
self,
- prefix: bsfs.Namespace,
ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]]
):
# store core members
- self._prefix = prefix + FILE_PREFIX
self._ext2rdr = ext2rdr
# compile schema from all extractors
self._schema = bsfs.schema.Schema.Union(ext.schema for ext in ext2rdr)
@@ -64,12 +57,11 @@ class Pipeline():
return f'{bsfs.typename(self)}(...)'
def __hash__(self) -> int:
- return hash((type(self), self._prefix, self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values())))
+ return hash((type(self), self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values())))
def __eq__(self, other: typing.Any) -> bool:
return isinstance(other, type(self)) \
and self._schema == other._schema \
- and self._prefix == other._prefix \
and self._ext2rdr == other._ext2rdr
@property
@@ -117,8 +109,9 @@ class Pipeline():
rdr2ext[rdr].add(ext)
# create subject for file
- uuid = bsfs.uuid.UCID.from_path(path)
- subject = node.Node(ns.bsfs.File, self._prefix[uuid])
+ subject = node.Node(ns.bsfs.File,
+ ucid=bsfs.uuid.UCID.from_path(path),
+ )
# extract information
for rdr, extrs in rdr2ext.items():
@@ -131,8 +124,7 @@ class Pipeline():
for ext in extrs:
try:
# get predicate/value tuples
- for subject, pred, value in ext.extract(subject, content, principals):
- yield subject, pred, value
+ yield from ext.extract(subject, content, principals)
except errors.ExtractorError as err:
# critical extractor failure.
diff --git a/bsie/reader/chain.py b/bsie/reader/chain.py
index 5e9e0d5..1dbc52b 100644
--- a/bsie/reader/chain.py
+++ b/bsie/reader/chain.py
@@ -73,16 +73,19 @@ class ReaderChain(base.Reader, typing.Generic[T_CONTENT]):
return hash((super().__hash__(), self._children))
def __call__(self, path: str) -> T_CONTENT:
- raise_error = errors.UnsupportedFileFormatError
+ raise_error = False
for child in self._children:
try:
return child(path)
except errors.UnsupportedFileFormatError:
+ # child cannot read the file, skip.
pass
except errors.ReaderError:
- # child cannot read the file, skip.
- raise_error = errors.ReaderError # type: ignore [assignment] # mypy is confused
+ # child failed to read the file, skip.
+ raise_error = True
- raise raise_error(path)
+ if raise_error:
+ raise errors.ReaderError(path)
+ raise errors.UnsupportedFileFormatError(path)
## EOF ##
diff --git a/bsie/reader/image/__init__.py b/bsie/reader/image/__init__.py
index 1f290b5..c5d2a2a 100644
--- a/bsie/reader/image/__init__.py
+++ b/bsie/reader/image/__init__.py
@@ -27,7 +27,6 @@ __all__: typing.Sequence[str] = (
## code ##
-# FIXME: Check if PIL.Image or PIL.Image.Image, or if version-dependent
class Image(chain.ReaderChain[PIL.Image.Image]): # pylint: disable=too-few-public-methods
"""Read an image file."""
diff --git a/bsie/reader/image/_pillow.py b/bsie/reader/image/_pillow.py
index 3144509..5b2bdf2 100644
--- a/bsie/reader/image/_pillow.py
+++ b/bsie/reader/image/_pillow.py
@@ -27,7 +27,7 @@ __all__: typing.Sequence[str] = (
class PillowImage(base.Reader):
"""Use PIL to read content of a variety of image file types."""
- def __call__(self, path: str) -> PIL.Image:
+ def __call__(self, path: str) -> PIL.Image.Image:
try:
# open file with PIL
return PIL.Image.open(path)
diff --git a/bsie/reader/image/_raw.py b/bsie/reader/image/_raw.py
index cd60453..257fdb3 100644
--- a/bsie/reader/image/_raw.py
+++ b/bsie/reader/image/_raw.py
@@ -32,17 +32,17 @@ class RawImage(base.Reader):
"""Use rawpy to read content of raw image file types."""
# file matcher
- match: filematcher.Matcher
+ _match: filematcher.Matcher
# additional kwargs to rawpy's postprocess
- rawpy_kwargs: typing.Dict[str, typing.Any]
+ _rawpy_kwargs: typing.Dict[str, typing.Any]
def __init__(self, **rawpy_kwargs):
match_rule = rawpy_kwargs.pop('file_match_rule', MATCH_RULE)
self._match = filematcher.parse(match_rule)
self._rawpy_kwargs = rawpy_kwargs
- def __call__(self, path: str) -> PIL.Image:
+ def __call__(self, path: str) -> PIL.Image.Image:
# perform quick checks first
if not self._match(path):
raise errors.UnsupportedFileFormatError(path)
diff --git a/bsie/reader/preview/__init__.py b/bsie/reader/preview/__init__.py
new file mode 100644
index 0000000..3e69a4a
--- /dev/null
+++ b/bsie/reader/preview/__init__.py
@@ -0,0 +1,39 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# external imports
+import PIL.Image
+
+# inner-module imports
+from .. import chain
+
+# constants
+_FILE_FORMAT_READERS: typing.Sequence[str] = (
+ # native image formats
+ __package__ + '._pillow.PillowPreviewReader',
+ __package__ + '._rawpy.RawpyPreviewReader',
+ # multiformat readers
+ __package__ + '._pg.PreviewGeneratorReader',
+ )
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Preview',
+ )
+
+
+## code ##
+
+class Preview(chain.ReaderChain[typing.Callable[[int], PIL.Image.Image]]): # pylint: disable=too-few-public-methods
+ """Create a preview from a file."""
+
+ def __init__(self, cfg: typing.Optional[typing.Any] = None):
+ super().__init__(_FILE_FORMAT_READERS, cfg)
+
+## EOF ##
diff --git a/bsie/reader/preview/_pg.py b/bsie/reader/preview/_pg.py
new file mode 100644
index 0000000..097c513
--- /dev/null
+++ b/bsie/reader/preview/_pg.py
@@ -0,0 +1,86 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+from functools import partial
+import contextlib
+import io
+import os
+import shutil
+import tempfile
+import typing
+
+# external imports
+from preview_generator.manager import PreviewManager
+import PIL.Image
+
+# bsie imports
+from bsie.utils import errors
+
+# inner-module imports
+from .. import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'PreviewGeneratorReader',
+ )
+
+
+## code ##
+
+class PreviewGeneratorReader(base.Reader):
+ """Uses preview_generator to create previews for various data formats.
+ See `https://github.com/algoo/preview-generator`_ for details.
+ """
+
+ # PreviewManager instance.
+ _mngr: PreviewManager
+
+ # Set of mime types supported by PreviewManager.
+ _supported_mimetypes: typing.Set[str]
+
+ # PreviewManager cache.
+ _cache: str
+
+ # Determines whether the cache directory should be deleted after use.
+ _cleanup: bool
+
+ def __init__(self, cache: typing.Optional[str] = None):
+ # initialize cache directory
+ # TODO: initialize in memory, e.g., via PyFilesystem
+ if cache is None:
+ self._cache = tempfile.mkdtemp(prefix='bsie-preview-cache-')
+ self._cleanup = True
+ else:
+ self._cache = cache
+ self._cleanup = False
+ # create preview generator
+ with contextlib.redirect_stderr(io.StringIO()):
+ self._mngr = PreviewManager(self._cache, create_folder=True)
+ self._supported_mimetypes = set(self._mngr.get_supported_mimetypes())
+
+ def __del__(self):
+ if self._cleanup:
+ shutil.rmtree(self._cache, ignore_errors=True)
+
+ def __call__(self, path: str) -> typing.Callable[[int], PIL.Image.Image]:
+ if not os.path.exists(path):
+ raise errors.ReaderError(path)
+ if self._mngr.get_mimetype(path) not in self._supported_mimetypes:
+ raise errors.UnsupportedFileFormatError(path)
+ return partial(self._preview_callback, path)
+
+ def _preview_callback(self, path: str, max_side: int) -> PIL.Image.Image:
+ """Produce a jpeg preview of *path* with at most *max_side* side length."""
+ try:
+ # generate the preview
+ preview_path = self._mngr.get_jpeg_preview(path, width=max_side, height=max_side)
+ # open the preview and return
+ return PIL.Image.open(preview_path)
+ except Exception as err: # FIXME: less generic exception!
+ raise errors.ReaderError(path) from err
+
+## EOF ##
diff --git a/bsie/reader/preview/_pillow.py b/bsie/reader/preview/_pillow.py
new file mode 100644
index 0000000..174d509
--- /dev/null
+++ b/bsie/reader/preview/_pillow.py
@@ -0,0 +1,44 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+from functools import partial
+import typing
+
+# external imports
+import PIL.Image
+
+# bsie imports
+from bsie.utils import errors
+
+# inner-module imports
+from . import utils
+from .. import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'PillowPreviewReader',
+ )
+
+
+## code ##
+
+class PillowPreviewReader(base.Reader):
+ """Produce previews for image files using the Pillow library."""
+
+ def __call__(self, path: str) -> typing.Callable[[int], PIL.Image.Image]:
+ try:
+ # open file with PIL
+ img = PIL.Image.open(path)
+ # return callback
+ return partial(utils.resize, img)
+ except PIL.UnidentifiedImageError as err:
+ # failed to open, skip file
+ raise errors.UnsupportedFileFormatError(path) from err
+ except IOError as err:
+ raise errors.ReaderError(path) from err
+
+# EOF ##
diff --git a/bsie/reader/preview/_rawpy.py b/bsie/reader/preview/_rawpy.py
new file mode 100644
index 0000000..2c20a48
--- /dev/null
+++ b/bsie/reader/preview/_rawpy.py
@@ -0,0 +1,66 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+from functools import partial
+import typing
+
+# external imports
+import PIL.Image
+import rawpy
+
+# bsie imports
+from bsie.utils import errors, filematcher
+
+# inner-module imports
+from . import utils
+from .. import base
+
+# constants
+MATCH_RULE = 'mime={image/x-nikon-nef} | extension={nef}'
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'RawpyPreviewReader',
+ )
+
+
+## code ##
+
+class RawpyPreviewReader(base.Reader):
+ """Produce previews for raw image files using the rawpy library."""
+
+ # file matcher
+ _match: filematcher.Matcher
+
+ # additional kwargs to rawpy's postprocess
+ _rawpy_kwargs: typing.Dict[str, typing.Any]
+
+ def __init__(self, **rawpy_kwargs):
+ match_rule = rawpy_kwargs.pop('file_match_rule', MATCH_RULE)
+ self._match = filematcher.parse(match_rule)
+ self._rawpy_kwargs = rawpy_kwargs
+
+ def __call__(self, path: str) -> typing.Callable[[int], PIL.Image.Image]:
+ # perform quick checks first
+ if not self._match(path):
+ raise errors.UnsupportedFileFormatError(path)
+
+ try:
+ # open file with rawpy
+ ary = rawpy.imread(path).postprocess(**self._rawpy_kwargs)
+ # convert to PIL.Image
+ img = PIL.Image.fromarray(ary)
+ # return callback
+ return partial(utils.resize, img)
+
+ except (rawpy.LibRawFatalError, # pylint: disable=no-member # pylint doesn't find the errors
+ rawpy.NotSupportedError, # pylint: disable=no-member
+ rawpy.LibRawNonFatalError, # pylint: disable=no-member
+ ) as err:
+ raise errors.ReaderError(path) from err
+
+## EOF ##
diff --git a/bsie/reader/preview/utils.py b/bsie/reader/preview/utils.py
new file mode 100644
index 0000000..2ef1562
--- /dev/null
+++ b/bsie/reader/preview/utils.py
@@ -0,0 +1,39 @@
+"""
+
+Part of the tagit module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import typing
+
+# external imports
+import PIL.Image
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'resize',
+ )
+
+
+## code ##
+
+def resize(
+ img: PIL.Image.Image,
+ max_size: int,
+ ) -> PIL.Image.Image:
+ """Resize an image to a given maximum side length."""
+ # determine target dimensions
+ ratio = img.width / img.height
+ if img.width > img.height:
+ width, height = max_size, round(max_size / ratio)
+ else:
+ width, height = round(ratio * max_size), max_size
+ # rescale and return
+ return img.resize(
+ (width, height),
+ resample=PIL.Image.Resampling.LANCZOS, # create high-quality image
+ reducing_gap=3.0, # optimize computation via fast size reduction
+ )
+
+## EOF ##
diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py
index 393b436..0af8ece 100644
--- a/bsie/utils/namespaces.py
+++ b/bsie/utils/namespaces.py
@@ -12,16 +12,18 @@ from . import bsfs as _bsfs
# constants
bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity')
+bsf = _bsfs.Namespace('http://ie.bsfs.ai/schema/Feature')
bsfs = _bsfs.Namespace('http://bsfs.ai/schema', fsep='/')
bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta')
+bsp = _bsfs.Namespace('http://bsfs.ai/schema/Preview')
xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema')
-bsf = _bsfs.Namespace('http://ie.bsfs.ai/schema/Feature')
# export
__all__: typing.Sequence[str] = (
'bse',
'bsfs',
'bsm',
+ 'bsp',
'xsd',
)
diff --git a/bsie/utils/node.py b/bsie/utils/node.py
index 91e4f37..aa62c06 100644
--- a/bsie/utils/node.py
+++ b/bsie/utils/node.py
@@ -19,30 +19,47 @@ __all__: typing.Sequence[str] = (
## code ##
class Node():
- """Lightweight Node, disconnected from any bsfs structures."""
+ """Lightweight Node, disconnected from any bsfs structures.
+
+ In most cases, provide *hints* and leave setting the uri to a node
+ naming policy. Only provide an *uri* if it is absolutely determined.
+
+ """
# node type.
node_type: bsfs.URI
# node URI.
- uri: bsfs.URI
+ uri: typing.Optional[bsfs.URI]
+
+ # node naming hints.
+ hits: dict
def __init__(
self,
node_type: bsfs.URI,
- uri: bsfs.URI,
+ uri: typing.Optional[bsfs.URI] = None,
+ **uri_hints,
):
# assign members
self.node_type = bsfs.URI(node_type)
- self.uri = bsfs.URI(uri)
+ self.hints = uri_hints
+ self.uri = uri
def __eq__(self, other: typing.Any) -> bool:
+ """Compare two Node instances based on type and uri.
+ Compares hits only if the uri is not yet specified.
+ """
return isinstance(other, Node) \
and other.node_type == self.node_type \
- and other.uri == self.uri
+ and other.uri == self.uri \
+ and (self.uri is not None or self.hints == other.hints)
def __hash__(self) -> int:
- return hash((type(self), self.node_type, self.uri))
+ identifier = self.uri
+ if identifier is None:
+ identifier = tuple((key, self.hints[key]) for key in sorted(self.hints))
+ return hash((type(self), self.node_type, identifier))
def __str__(self) -> str:
return f'{bsfs.typename(self)}({self.node_type}, {self.uri})'