aboutsummaryrefslogtreecommitdiffstats
path: root/bsie
diff options
context:
space:
mode:
Diffstat (limited to 'bsie')
-rw-r--r--bsie/apps/index.py7
-rw-r--r--bsie/apps/info.py3
-rw-r--r--bsie/extractor/base.py1
-rw-r--r--bsie/extractor/preview.py99
-rw-r--r--bsie/lib/naming_policy.py19
-rw-r--r--bsie/utils/namespaces.py4
6 files changed, 131 insertions, 2 deletions
diff --git a/bsie/apps/index.py b/bsie/apps/index.py
index a870364..8798c49 100644
--- a/bsie/apps/index.py
+++ b/bsie/apps/index.py
@@ -13,7 +13,7 @@ import typing
from bsie.extractor import ExtractorBuilder
from bsie.lib import BSIE, PipelineBuilder, DefaultNamingPolicy
from bsie.reader import ReaderBuilder
-from bsie.utils import bsfs, errors
+from bsie.utils import bsfs, errors, node as node_
# exports
__all__: typing.Sequence[str] = (
@@ -49,6 +49,9 @@ def main(argv):
rbuild = ReaderBuilder()
# extractor builder
ebuild = ExtractorBuilder([
+ {'bsie.extractor.preview.Preview': {
+ 'max_sides': [50],
+ }},
{'bsie.extractor.generic.path.Path': {}},
{'bsie.extractor.generic.stat.Stat': {}},
{'bsie.extractor.generic.constant.Constant': dict(
@@ -116,6 +119,8 @@ def main(argv):
store.migrate(bsie.schema)
# process files
def handle(node, pred, value):
+ if isinstance(value, node_.Node):
+ value = store.node(value.node_type, value.uri)
store.node(node.node_type, node.uri).set(pred.uri, value)
walk(handle)
# return store
diff --git a/bsie/apps/info.py b/bsie/apps/info.py
index 4e948fc..750aedc 100644
--- a/bsie/apps/info.py
+++ b/bsie/apps/info.py
@@ -35,6 +35,9 @@ def main(argv):
rbuild = ReaderBuilder()
# extractor builder
ebuild = ExtractorBuilder([
+ {'bsie.extractor.preview.Preview': {
+ 'max_sides': [50, 200],
+ }},
{'bsie.extractor.generic.path.Path': {}},
{'bsie.extractor.generic.stat.Stat': {}},
{'bsie.extractor.generic.constant.Constant': dict(
diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py
index 7401244..89183f9 100644
--- a/bsie/extractor/base.py
+++ b/bsie/extractor/base.py
@@ -30,6 +30,7 @@ SCHEMA_PREAMBLE = '''
# common bsfs prefixes
prefix bsfs: <http://bsfs.ai/schema/>
prefix bse: <http://bsfs.ai/schema/Entity#>
+ prefix bsp: <http://bsfs.ai/schema/Preview#>
# default definitions
bsfs:Array rdfs:subClassOf bsfs:Literal .
diff --git a/bsie/extractor/preview.py b/bsie/extractor/preview.py
new file mode 100644
index 0000000..1531d62
--- /dev/null
+++ b/bsie/extractor/preview.py
@@ -0,0 +1,99 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import io
+import typing
+
+# external imports
+import PIL.Image
+
+# bsie imports
+from bsie.utils import bsfs, node, ns
+
+# inner-module imports
+from . import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Preview',
+ )
+
+
+## code ##
+
+class Preview(base.Extractor):
+ """Extract previews."""
+
+ CONTENT_READER = 'bsie.reader.preview.Preview'
+
+ def __init__(self, max_sides: typing.Iterable[int]):
+ super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + '''
+
+ bsfs:Preview rdfs:subClassOf bsfs:Node .
+ bsfs:BinaryBlob rdfs:subClassOf bsfs:Literal .
+ bsfs:JPEG rdfs:subClassOf bsfs:BinaryBlob .
+
+ bse:preview rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range bsfs:Preview ;
+ bsfs:unique "false"^^xsd:boolean .
+
+ bsp:width rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Preview ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+
+ bsp:height rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Preview ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+
+ bsp:asset rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Preview ;
+ rdfs:range bsfs:JPEG ;
+ bsfs:unique "true"^^xsd:boolean .
+
+ '''))
+ # initialize extra args
+ self.max_sides = set(max_sides)
+
+ def __eq__(self, other: typing.Any) -> bool:
+ return super().__eq__(other) \
+ and self.max_sides == other.max_sides
+
+ def __hash__(self) -> int:
+ return hash((super().__hash__(), tuple(sorted(self.max_sides))))
+
+ def extract(
+ self,
+ subject: node.Node,
+ content: typing.Callable[[int], PIL.Image.Image],
+ principals: typing.Iterable[bsfs.schema.Predicate],
+ ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ # check principals
+ if self.schema.predicate(ns.bse.preview) not in principals:
+ return
+
+ for max_side in self.max_sides:
+ # get the preview in the right resolution
+ img = content(max_side)
+ # convert the preview to jpeg
+ buffer = io.BytesIO()
+ img.save(buffer, format='jpeg')
+ # create a preview node
+ preview = node.Node(ns.bsfs.Preview,
+ ucid=bsfs.uuid.UCID.from_bytes(buffer.getvalue()),
+ size=max_side,
+ source=subject,
+ )
+ # yield triples
+ yield subject, self.schema.predicate(ns.bse.preview), preview
+ yield preview, self.schema.predicate(ns.bsp.width), img.width
+ yield preview, self.schema.predicate(ns.bsp.height), img.height
+ yield preview, self.schema.predicate(ns.bsp.asset), buffer.getvalue()
+
+## EOF ##
diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py
index 360abde..131a70b 100644
--- a/bsie/lib/naming_policy.py
+++ b/bsie/lib/naming_policy.py
@@ -87,6 +87,8 @@ class DefaultNamingPolicy(NamingPolicy):
return node
if node.node_type == ns.bsfs.File:
return self.name_file(node)
+ if node.node_type == ns.bsfs.Preview:
+ return self.name_preview(node)
raise errors.ProgrammingError('no naming policy available for {node.node_type}')
def name_file(self, node: Node) -> Node:
@@ -98,4 +100,21 @@ class DefaultNamingPolicy(NamingPolicy):
node.uri = (self._prefix + 'file')[fragment]
return node
+ def name_preview(self, node: Node) -> Node:
+ """Set a bsfs:Preview node's uri fragment to its ucid.
+ Uses its source fragment as fallback. Appends the size if provided.
+ """
+ fragment = None
+ if 'ucid' in node.hints: # content id
+ fragment = node.hints['ucid']
+ if fragment is None and 'source' in node.hints: # source id
+ self.handle_node(node.hints['source'])
+ fragment = node.hints['source'].uri.get('fragment', None)
+ if fragment is None: # random name
+ fragment = self._uuid()
+ if 'size' in node.hints: # append size
+ fragment += '_s' + str(node.hints['size'])
+ node.uri = (self._prefix + 'preview')[fragment]
+ return node
+
## EOF ##
diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py
index 393b436..0af8ece 100644
--- a/bsie/utils/namespaces.py
+++ b/bsie/utils/namespaces.py
@@ -12,16 +12,18 @@ from . import bsfs as _bsfs
# constants
bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity')
+bsf = _bsfs.Namespace('http://ie.bsfs.ai/schema/Feature')
bsfs = _bsfs.Namespace('http://bsfs.ai/schema', fsep='/')
bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta')
+bsp = _bsfs.Namespace('http://bsfs.ai/schema/Preview')
xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema')
-bsf = _bsfs.Namespace('http://ie.bsfs.ai/schema/Feature')
# export
__all__: typing.Sequence[str] = (
'bse',
'bsfs',
'bsm',
+ 'bsp',
'xsd',
)