aboutsummaryrefslogtreecommitdiffstats
path: root/bsie
diff options
context:
space:
mode:
Diffstat (limited to 'bsie')
-rw-r--r--bsie/apps/_loader.py6
-rw-r--r--bsie/apps/index.py23
-rw-r--r--bsie/apps/info.py2
-rw-r--r--bsie/extractor/builder.py3
-rw-r--r--bsie/extractor/image/iptc.py70
-rw-r--r--bsie/lib/naming_policy.py27
-rw-r--r--bsie/reader/exif.py21
-rw-r--r--bsie/utils/__init__.py1
-rw-r--r--bsie/utils/filewalker.py31
-rw-r--r--bsie/utils/namespaces.py2
10 files changed, 157 insertions, 29 deletions
diff --git a/bsie/apps/_loader.py b/bsie/apps/_loader.py
index 6411f10..d9ea9bb 100644
--- a/bsie/apps/_loader.py
+++ b/bsie/apps/_loader.py
@@ -1,5 +1,6 @@
# standard imports
+import os
import typing
# external imports
@@ -12,8 +13,7 @@ from bsie.lib.pipeline import Pipeline
from bsie.reader import ReaderBuilder
# constants
-DEFAULT_CONFIG_FILE = 'default_config.yaml'
-
+DEFAULT_CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'default_config.yaml')
# exports
__all__: typing.Sequence[str] = (
'DEFAULT_CONFIG_FILE',
@@ -23,7 +23,7 @@ __all__: typing.Sequence[str] = (
## code ##
-def load_pipeline(path: str) -> Pipeline:
+def load_pipeline(path: str = DEFAULT_CONFIG_FILE) -> Pipeline:
"""Load a pipeline according to a config at *path*."""
# load config file
with open(path, 'rt', encoding='utf-8') as ifile:
diff --git a/bsie/apps/index.py b/bsie/apps/index.py
index d64e8c2..7dda6f4 100644
--- a/bsie/apps/index.py
+++ b/bsie/apps/index.py
@@ -6,7 +6,7 @@ import typing
# bsie imports
from bsie.lib import BSIE, DefaultNamingPolicy
-from bsie.utils import bsfs, errors, node as node_
+from bsie.utils import bsfs, errors, node as node_, list_files
# inner-module imports
from . import _loader
@@ -23,7 +23,7 @@ def main(argv):
"""Index files or directories into BSFS."""
parser = argparse.ArgumentParser(description=main.__doc__, prog='index')
parser.add_argument('--config', type=str,
- default=os.path.join(os.path.dirname(__file__), _loader.DEFAULT_CONFIG_FILE),
+ default=_loader.DEFAULT_CONFIG_FILE,
help='Path to the config file.')
parser.add_argument('--host', type=bsfs.URI, default=bsfs.URI('http://example.com'),
help='')
@@ -59,22 +59,9 @@ def main(argv):
# FIXME: simplify code (below but maybe also above)
# FIXME: How to handle dependencies between data?
# E.g. do I still want to link to a tag despite not being permitted to set its label?
-
- # index input paths
- for path in args.input_file:
- if not os.path.exists(path):
- pass # FIXME: notify the user
- elif os.path.isdir(path) and args.recursive:
- for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow):
- for filename in filenames:
- for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)):
- handle(node, pred, value)
- elif os.path.isfile(path):
- for node, pred, value in bsie.from_file(path):
- handle(node, pred, value)
- else:
- raise errors.UnreachableError()
-
+ for path in list_files(args.input_file, args.recursive, args.follow):
+ for node, pred, value in bsie.from_file(path):
+ handle(node, pred, value)
if args.print:
walk(print)
diff --git a/bsie/apps/info.py b/bsie/apps/info.py
index e27b70b..b6494da 100644
--- a/bsie/apps/info.py
+++ b/bsie/apps/info.py
@@ -23,7 +23,7 @@ def main(argv):
"""Show information from BSIE."""
parser = argparse.ArgumentParser(description=main.__doc__, prog='info')
parser.add_argument('--config', type=str,
- default=os.path.join(os.path.dirname(__file__), _loader.DEFAULT_CONFIG_FILE),
+ default=_loader.DEFAULT_CONFIG_FILE,
help='Path to the config file.')
parser.add_argument('what', choices=('predicates', 'schema'),
help='Select what information to show.')
diff --git a/bsie/extractor/builder.py b/bsie/extractor/builder.py
index d691b0e..8353a93 100644
--- a/bsie/extractor/builder.py
+++ b/bsie/extractor/builder.py
@@ -67,6 +67,7 @@ class ExtractorBuilder():
return cls(**kwargs)
except Exception as err:
- raise errors.BuilderError(f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err
+ raise errors.BuilderError(
+ f'failed to build extractor {name} due to {bsfs.typename(err)}: {err}') from err
## EOF ##
diff --git a/bsie/extractor/image/iptc.py b/bsie/extractor/image/iptc.py
new file mode 100644
index 0000000..195eff7
--- /dev/null
+++ b/bsie/extractor/image/iptc.py
@@ -0,0 +1,70 @@
+
+# standard imports
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, node, ns
+
+# inner-module imports
+from .. import base
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Iptc',
+ )
+
+
+## code ##
+
+class Iptc(base.Extractor):
+ """Turn IPTC keywords into tags."""
+
+ CONTENT_READER = 'bsie.reader.exif.Iptc'
+
+ def __init__(self):
+ super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + '''
+ bsn:Tag rdfs:subClassOf bsfs:Node .
+
+ bse:tag rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsn:Entity ;
+ rdfs:range bsn:Tag .
+
+ <https://schema.bsfs.io/ie/Node/Tag#label> rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsn:Tag ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+
+ '''))
+ self._callmap = {
+ self.schema.predicate(ns.bse.tag): self._keywords,
+ }
+
+ def extract(
+ self,
+ subject: node.Node,
+ content: dict,
+ principals: typing.Iterable[bsfs.schema.Predicate],
+ ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ for pred in principals:
+ # find callback
+ clbk = self._callmap.get(pred)
+ if clbk is None:
+ continue
+ # produce triples
+ yield from clbk(subject, content)
+
+ def _keywords(
+ self,
+ subject: node.Node,
+ content: dict,
+ ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ if 'Iptc.Application2.Keywords' not in content:
+ return
+ for keyword in content['Iptc.Application2.Keywords']:
+ tag = node.Node(ns.bsn.Tag, label=keyword)
+ yield subject, self.schema.predicate(ns.bse.tag), tag
+ yield tag, self.schema.predicate(ns.bst.label), keyword
+
+
+
+## EOF ##
diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py
index 9b9a45d..ffef7d9 100644
--- a/bsie/lib/naming_policy.py
+++ b/bsie/lib/naming_policy.py
@@ -4,6 +4,9 @@ import abc
import os
import typing
+# external imports
+import urllib.parse
+
# bsie imports
from bsie.utils import bsfs, errors, ns
from bsie.utils.node import Node
@@ -80,14 +83,16 @@ class DefaultNamingPolicy(NamingPolicy):
def handle_node(self, node: Node) -> Node:
if node.uri is not None:
return node
- if node.node_type == ns.bsn.Entity :
- return self.name_file(node)
+ if node.node_type == ns.bsn.Entity:
+ return self.name_entity(node)
if node.node_type == ns.bsn.Preview:
return self.name_preview(node)
- raise errors.ProgrammingError('no naming policy available for {node.node_type}')
+ if node.node_type == ns.bsn.Tag:
+ return self.name_tag(node)
+ raise errors.ProgrammingError(f'no naming policy available for {node.node_type}')
- def name_file(self, node: Node) -> Node:
- """Set a bsfs:File node's uri fragment to its ucid."""
+ def name_entity(self, node: Node) -> Node:
+ """Set a bsn:Entity node's uri fragment to its ucid."""
if 'ucid' in node.hints: # content id
fragment = node.hints['ucid']
else: # random name
@@ -96,7 +101,7 @@ class DefaultNamingPolicy(NamingPolicy):
return node
def name_preview(self, node: Node) -> Node:
- """Set a bsfs:Preview node's uri fragment to its ucid.
+ """Set a bsn:Preview node's uri fragment to its ucid.
Uses its source fragment as fallback. Appends the size if provided.
"""
fragment = None
@@ -112,4 +117,14 @@ class DefaultNamingPolicy(NamingPolicy):
node.uri = getattr(self._prefix.preview(), fragment)
return node
+ def name_tag(self, node: Node) -> Node:
+ # NOTE: Must ensure to produce the same name for that tags with the same label.
+ if 'label' in node.hints: # tag label
+ fragment = urllib.parse.quote(node.hints['label'])
+ else: # random name
+ fragment = self._uuid()
+ # FIXME: match to existing tags in bsfs storage!
+ node.uri = getattr(self._prefix.tag(), fragment)
+ return node
+
## EOF ##
diff --git a/bsie/reader/exif.py b/bsie/reader/exif.py
index 2d0428b..7ec7574 100644
--- a/bsie/reader/exif.py
+++ b/bsie/reader/exif.py
@@ -17,6 +17,7 @@ MATCH_RULE = 'mime=image/jpeg'
# exports
__all__: typing.Sequence[str] = (
'Exif',
+ 'Iptc',
)
@@ -41,4 +42,24 @@ class Exif(base.Reader):
except (TypeError, OSError, RuntimeError) as err:
raise errors.ReaderError(path) from err
+
+class Iptc(base.Reader):
+ """Use pyexiv2 to read iptc metadata from image files."""
+
+ def __init__(self):
+ self._match = filematcher.parse(MATCH_RULE)
+
+ def __call__(self, path: str) -> dict:
+ # perform quick checks first
+ if not self._match(path):
+ raise errors.UnsupportedFileFormatError(path)
+
+ try:
+ # open the file
+ img = pyexiv2.Image(path)
+ # read metadata
+ return img.read_iptc()
+ except (TypeError, OSError, RuntimeError) as err:
+ raise errors.ReaderError(path) from err
+
## EOF ##
diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py
index 18c8db7..4f08604 100644
--- a/bsie/utils/__init__.py
+++ b/bsie/utils/__init__.py
@@ -8,6 +8,7 @@ from . import bsfs
from . import filematcher
from . import namespaces as ns
from . import node
+from .filewalker import list_files
from .loading import safe_load, unpack_qualified_name
# exports
diff --git a/bsie/utils/filewalker.py b/bsie/utils/filewalker.py
new file mode 100644
index 0000000..3c36926
--- /dev/null
+++ b/bsie/utils/filewalker.py
@@ -0,0 +1,31 @@
+
+# standard imports
+import os
+import typing
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'list_files',
+ )
+
+
+## code ##
+
+def list_files(
+ roots: typing.Iterable[str],
+ recursive: bool = True,
+ follow_symlinks: bool = True,
+ ) -> typing.Iterator[str]:
+ """Iterate over all files in *roots*, recursively by default."""
+ # index input paths
+ for path in roots:
+ if not os.path.exists(path):
+ continue
+ elif os.path.isdir(path) and recursive:
+ for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=follow_symlinks):
+ for filename in filenames:
+ yield os.path.join(dirpath, filename)
+ elif os.path.isfile(path):
+ yield path
+
+## EOF ##
diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py
index 4a66048..9357253 100644
--- a/bsie/utils/namespaces.py
+++ b/bsie/utils/namespaces.py
@@ -20,6 +20,7 @@ bsf = bsie.Literal.Array.Feature
bsl = bsfs.Literal
bsn = bsie.Node
bsp = bsie.Node.Preview()
+bst = bsie.Node.Tag()
# export
__all__: typing.Sequence[str] = (
@@ -32,6 +33,7 @@ __all__: typing.Sequence[str] = (
'bsl',
'bsn',
'bsp',
+ 'bst',
'xsd',
)