aboutsummaryrefslogtreecommitdiffstats
path: root/bsie
diff options
context:
space:
mode:
Diffstat (limited to 'bsie')
-rw-r--r--bsie/apps/index.py11
-rw-r--r--bsie/extractor/base.py7
-rw-r--r--bsie/extractor/generic/constant.py7
-rw-r--r--bsie/extractor/generic/path.py8
-rw-r--r--bsie/extractor/generic/stat.py7
-rw-r--r--bsie/extractor/image/colors_spatial.py7
-rw-r--r--bsie/extractor/image/face/detect.py9
-rw-r--r--bsie/extractor/image/face/identify.py11
-rw-r--r--bsie/extractor/image/iptc.py13
-rw-r--r--bsie/extractor/image/photometrics.py7
-rw-r--r--bsie/extractor/preview.py10
-rw-r--r--bsie/lib/__init__.py1
-rw-r--r--bsie/lib/bsie.py12
-rw-r--r--bsie/lib/naming_policy.py141
-rw-r--r--bsie/lib/pipeline.py9
-rw-r--r--bsie/matcher/__init__.py17
-rw-r--r--bsie/matcher/default_matcher.py76
-rw-r--r--bsie/matcher/matcher.py61
-rw-r--r--bsie/matcher/nodes.py49
-rw-r--r--bsie/reader/face.py2
-rw-r--r--bsie/utils/__init__.py2
-rw-r--r--bsie/utils/node.py66
22 files changed, 268 insertions, 265 deletions
diff --git a/bsie/apps/index.py b/bsie/apps/index.py
index 7dda6f4..260d3c8 100644
--- a/bsie/apps/index.py
+++ b/bsie/apps/index.py
@@ -5,8 +5,9 @@ import os
import typing
# bsie imports
-from bsie.lib import BSIE, DefaultNamingPolicy
-from bsie.utils import bsfs, errors, node as node_, list_files
+from bsie.lib import BSIE
+from bsie.matcher import nodes, DefaultMatcher
+from bsie.utils import bsfs, errors, list_files
# inner-module imports
from . import _loader
@@ -45,13 +46,13 @@ def main(argv):
# build pipeline
pipeline = _loader.load_pipeline(args.config)
- # build the naming policy
- naming_policy = DefaultNamingPolicy(
+ # build the node matcher
+ matcher = DefaultMatcher(
host=args.host,
user=args.user,
)
# build BSIE frontend
- bsie = BSIE(pipeline, naming_policy, args.collect, args.discard)
+ bsie = BSIE(pipeline, matcher, args.collect, args.discard)
def walk(handle):
"""Walk through given input files."""
diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py
index f92d7cc..bb2ee81 100644
--- a/bsie/extractor/base.py
+++ b/bsie/extractor/base.py
@@ -5,7 +5,8 @@ import abc
import typing
# bsie imports
-from bsie.utils import bsfs, node, ns
+from bsie.matcher import nodes
+from bsie.utils import bsfs, ns
# exports
__all__: typing.Sequence[str] = (
@@ -106,10 +107,10 @@ class Extractor(abc.ABC):
@abc.abstractmethod
def extract(
self,
- subject: node.Node,
+ subject: nodes.Entity,
content: typing.Any,
principals: typing.Iterable[bsfs.schema.Predicate],
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]:
"""Return (node, predicate, value) triples."""
# FIXME: type annotation could be more strict: value is Hashable
diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py
index 7acbe95..e038c0b 100644
--- a/bsie/extractor/generic/constant.py
+++ b/bsie/extractor/generic/constant.py
@@ -4,7 +4,8 @@
import typing
# bsie imports
-from bsie.utils import bsfs, node
+from bsie.matcher import nodes
+from bsie.utils import bsfs
# inner-module imports
from .. import base
@@ -44,10 +45,10 @@ class Constant(base.Extractor):
def extract(
self,
- subject: node.Node,
+ subject: nodes.Entity,
content: None,
principals: typing.Iterable[bsfs.schema.Predicate],
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]:
for pred, value in self._tuples:
if pred in principals:
yield subject, pred, value
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index 30d75cf..7fe157b 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -5,7 +5,8 @@ import typing
# bsie imports
from bsie.extractor import base
-from bsie.utils import bsfs, node, ns
+from bsie.matcher import nodes
+from bsie.utils import bsfs, ns
# exports
__all__: typing.Sequence[str] = (
@@ -41,14 +42,15 @@ class Path(base.Extractor):
'''))
self._callmap = {
self.schema.predicate(ns.bse.filename): self.__filename,
+ self.schema.predicate(ns.bse.dirname): self.__dirname,
}
def extract(
self,
- subject: node.Node,
+ subject: nodes.Entity,
content: str,
principals: typing.Iterable[bsfs.schema.Predicate],
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]:
for pred in principals:
# find callback
clbk = self._callmap.get(pred)
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
index 92b51f3..ff51cff 100644
--- a/bsie/extractor/generic/stat.py
+++ b/bsie/extractor/generic/stat.py
@@ -5,7 +5,8 @@ import os
import typing
# bsie imports
-from bsie.utils import bsfs, node, ns
+from bsie.matcher import nodes
+from bsie.utils import bsfs, ns
# inner-module imports
from .. import base
@@ -41,10 +42,10 @@ class Stat(base.Extractor):
def extract(
self,
- subject: node.Node,
+ subject: nodes.Entity,
content: os.stat_result,
principals: typing.Iterable[bsfs.schema.Predicate],
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]:
for pred in principals:
# find callback
clbk = self._callmap.get(pred)
diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py
index e6661a9..bccefc1 100644
--- a/bsie/extractor/image/colors_spatial.py
+++ b/bsie/extractor/image/colors_spatial.py
@@ -8,7 +8,8 @@ import PIL.Image
import numpy as np
# bsie imports
-from bsie.utils import bsfs, node, ns
+from bsie.matcher import nodes
+from bsie.utils import bsfs, ns
# inner-module imports
from .. import base
@@ -115,10 +116,10 @@ class ColorsSpatial(base.Extractor):
def extract(
self,
- subject: node.Node,
+ subject: nodes.Entity,
content: PIL.Image.Image,
principals: typing.Iterable[bsfs.schema.Predicate],
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]:
# check principals
if self.schema.predicate(self._predicate_name) not in principals:
# nothing to do; abort
diff --git a/bsie/extractor/image/face/detect.py b/bsie/extractor/image/face/detect.py
index 94e3a61..51d5659 100644
--- a/bsie/extractor/image/face/detect.py
+++ b/bsie/extractor/image/face/detect.py
@@ -7,7 +7,8 @@ import torch
from facenet_pytorch import MTCNN, InceptionResnetV1
# bsie imports
-from bsie.utils import bsfs, node, ns
+from bsie.matcher import nodes
+from bsie.utils import bsfs, ns
# inner-module imports
from ... import base
@@ -72,17 +73,17 @@ class FaceDetect(base.Extractor):
def extract(
self,
- subject: node.Node,
+ subject: nodes.Entity,
content: dict,
principals: typing.Iterable[bsfs.schema.Predicate],
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]:
# check principals
if self.schema.predicate(ns.bse.face) not in principals:
# nothing to do; abort
return
for face in content:
- fnode = node.Node(ns.bsn.Face, ucid=face['ucid'])
+ fnode = nodes.Face(ucid=face['ucid'])
yield subject, ns.bse.face, fnode
yield fnode, bsf.x, face['x']
yield fnode, bsf.y, face['y']
diff --git a/bsie/extractor/image/face/identify.py b/bsie/extractor/image/face/identify.py
index 152f113..44a75c4 100644
--- a/bsie/extractor/image/face/identify.py
+++ b/bsie/extractor/image/face/identify.py
@@ -9,7 +9,8 @@ import numpy as np
import torch
# bsie imports
-from bsie.utils import bsfs, node, ns
+from bsie.matcher import nodes
+from bsie.utils import bsfs, ns
# inner-module imports
from ... import base
@@ -142,10 +143,10 @@ class FaceIdentify(base.Extractor):
def extract(
self,
- subject: node.Node,
+ subject: nodes.Entity,
content: typing.Any,
principals: typing.Iterable[bsfs.schema.Predicate],
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]:
# check principals
#if self.schema.predicate(bsf.depicts) not in principals:
if self.schema.predicate(ns.bse.face) not in principals:
@@ -164,8 +165,8 @@ class FaceIdentify(base.Extractor):
lbl = bsfs.URI(self._id2name[idx]) # label (uri) of nearest neighbour
if lbl == self._restklasse: # suppress
continue
- pnode = node.Node(ns.bsn.Person, uri=lbl)
- fnode = node.Node(ns.bsn.Face, ucid=face['ucid'])
+ pnode = nodes.Person(uri=lbl)
+ fnode = nodes.Face(ucid=face['ucid'])
# emit triple
yield fnode, self.schema.predicate(bsf.depicts), pnode
# FIXME: emit subject -> face -> fnode?
diff --git a/bsie/extractor/image/iptc.py b/bsie/extractor/image/iptc.py
index 195eff7..0c03539 100644
--- a/bsie/extractor/image/iptc.py
+++ b/bsie/extractor/image/iptc.py
@@ -3,7 +3,8 @@
import typing
# bsie imports
-from bsie.utils import bsfs, node, ns
+from bsie.matcher import nodes
+from bsie.utils import bsfs, ns
# inner-module imports
from .. import base
@@ -41,10 +42,10 @@ class Iptc(base.Extractor):
def extract(
self,
- subject: node.Node,
+ subject: nodes.Entity,
content: dict,
principals: typing.Iterable[bsfs.schema.Predicate],
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]:
for pred in principals:
# find callback
clbk = self._callmap.get(pred)
@@ -55,13 +56,13 @@ class Iptc(base.Extractor):
def _keywords(
self,
- subject: node.Node,
+ subject: nodes.Entity,
content: dict,
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]:
if 'Iptc.Application2.Keywords' not in content:
return
for keyword in content['Iptc.Application2.Keywords']:
- tag = node.Node(ns.bsn.Tag, label=keyword)
+ tag = nodes.Tag(label=keyword)
yield subject, self.schema.predicate(ns.bse.tag), tag
yield tag, self.schema.predicate(ns.bst.label), keyword
diff --git a/bsie/extractor/image/photometrics.py b/bsie/extractor/image/photometrics.py
index 42eb3c8..4579b54 100644
--- a/bsie/extractor/image/photometrics.py
+++ b/bsie/extractor/image/photometrics.py
@@ -4,7 +4,8 @@ from fractions import Fraction
import typing
# bsie imports
-from bsie.utils import bsfs, node, ns
+from bsie.matcher import nodes
+from bsie.utils import bsfs, ns
# inner-module imports
from .. import base
@@ -107,10 +108,10 @@ class Exif(base.Extractor):
def extract(
self,
- subject: node.Node,
+ subject: nodes.Entity,
content: dict,
principals: typing.Iterable[bsfs.schema.Predicate],
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]:
for pred in principals:
# find callback
clbk = self._callmap.get(pred)
diff --git a/bsie/extractor/preview.py b/bsie/extractor/preview.py
index 145a01a..fcda71c 100644
--- a/bsie/extractor/preview.py
+++ b/bsie/extractor/preview.py
@@ -7,7 +7,8 @@ import typing
import PIL.Image
# bsie imports
-from bsie.utils import bsfs, node, ns
+from bsie.matcher import nodes
+from bsie.utils import bsfs, ns
# inner-module imports
from . import base
@@ -67,10 +68,10 @@ class Preview(base.Extractor):
def extract(
self,
- subject: node.Node,
+ subject: nodes.Entity,
content: typing.Callable[[int], PIL.Image.Image],
principals: typing.Iterable[bsfs.schema.Predicate],
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]:
# check principals
if self.schema.predicate(ns.bse.preview) not in principals:
return
@@ -82,10 +83,9 @@ class Preview(base.Extractor):
buffer = io.BytesIO()
img.save(buffer, format='jpeg')
# create a preview node
- preview = node.Node(ns.bsn.Preview,
+ preview = nodes.Preview(
ucid=bsfs.uuid.UCID.from_bytes(buffer.getvalue()),
size=max_side,
- source=subject,
)
# yield triples
yield subject, self.schema.predicate(ns.bse.preview), preview
diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py
index f44fb74..daa806c 100644
--- a/bsie/lib/__init__.py
+++ b/bsie/lib/__init__.py
@@ -5,7 +5,6 @@ import typing
# inner-module imports
from .bsie import BSIE
from .builder import PipelineBuilder
-from .naming_policy import DefaultNamingPolicy
# exports
__all__: typing.Sequence[str] = (
diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py
index b02e707..9aa0bdb 100644
--- a/bsie/lib/bsie.py
+++ b/bsie/lib/bsie.py
@@ -3,10 +3,10 @@
import typing
# bsie imports
-from bsie.utils import bsfs, node, ns
+from bsie.utils import bsfs, ns
+from bsie.matcher import Matcher, nodes
# inner-module imports
-from .naming_policy import NamingPolicy
from .pipeline import Pipeline
# exports
@@ -40,7 +40,7 @@ class BSIE():
# pipeline.
pipeline: Pipeline,
# naming policy
- naming_policy: NamingPolicy,
+ matcher: Matcher,
# principals to extract at most. None implies all available w.r.t. extractors.
collect: typing.Optional[typing.Iterable[bsfs.URI]] = None,
# principals to discard.
@@ -48,7 +48,7 @@ class BSIE():
):
# store pipeline and naming policy
self._pipeline = pipeline
- self._naming_policy = naming_policy
+ self._matcher = matcher
# start off with available principals
self._principals = {pred.uri for pred in self._pipeline.principals}
# limit principals to specified ones by argument.
@@ -79,7 +79,7 @@ class BSIE():
self,
path: bsfs.URI,
principals: typing.Optional[typing.Iterable[bsfs.URI]] = None,
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.URI, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.URI, typing.Any]]:
"""Produce triples for a given *path*. Limit to *principals* if given."""
# get requested principals.
principals = set(principals) if principals is not None else self._principals
@@ -88,6 +88,6 @@ class BSIE():
# predicate lookup
principals = {self.schema.predicate(pred) for pred in principals}
# invoke pipeline
- yield from self._naming_policy(self._pipeline(path, principals))
+ yield from self._matcher(self._pipeline(path, principals))
## EOF ##
diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py
deleted file mode 100644
index fbdbeb0..0000000
--- a/bsie/lib/naming_policy.py
+++ /dev/null
@@ -1,141 +0,0 @@
-
-# standard imports
-import abc
-import os
-import typing
-
-# external imports
-import urllib.parse
-
-# bsie imports
-from bsie.utils import bsfs, errors, ns
-from bsie.utils.node import Node
-
-# exports
-__all__: typing.Sequence[str] = (
- 'DefaultNamingPolicy',
- )
-
-
-## code ##
-
-class NamingPolicy():
- """Determine node uri's from node hints."""
- def __call__(
- self,
- iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]],
- ):
- """Apply the policy on a triple iterator."""
- return NamingPolicyIterator(self, iterable)
-
- @abc.abstractmethod
- def handle_node(self, node: Node) -> Node:
- """Apply the policy on a node."""
-
-
-class NamingPolicyIterator():
- """Iterates over triples, determines uris according to a *policy* as it goes."""
-
- # source triple iterator.
- _iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]]
-
- # naming policy
- _policy: NamingPolicy
-
- def __init__(
- self,
- policy: NamingPolicy,
- iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]],
- ):
- self._iterable = iterable
- self._policy = policy
-
- def __iter__(self):
- for node, pred, value in self._iterable:
- # handle subject
- self._policy.handle_node(node)
- # handle value
- if isinstance(value, Node):
- self._policy.handle_node(value)
- # yield triple
- yield node, pred, value
-
-
-class DefaultNamingPolicy(NamingPolicy):
- """Compose URIs as <host/user/node_type#fragment>
-
- What information is used as fragment depends on the node type.
- Typically, the default is to use the "ucid" hint.
- The fallback in all cases is to generate a random uuid.
-
- Never changes previously assigned uris. Sets uris in-place.
-
- """
-
- def __init__(
- self,
- host: bsfs.URI,
- user: str,
- ):
- self._prefix = bsfs.Namespace(os.path.join(host, user))
- self._uuid = bsfs.uuid.UUID()
-
- def handle_node(self, node: Node) -> Node:
- if node.uri is not None:
- return node
- if node.node_type == ns.bsn.Entity:
- return self.name_entity(node)
- if node.node_type == ns.bsn.Preview:
- return self.name_preview(node)
- if node.node_type == ns.bsn.Tag:
- return self.name_tag(node)
- if node.node_type == ns.bsn.Face:
- return self.name_face(node)
- raise errors.ProgrammingError(f'no naming policy available for {node.node_type}')
-
- def name_entity(self, node: Node) -> Node:
- """Set a bsn:Entity node's uri fragment to its ucid."""
- if 'ucid' in node.hints: # content id
- fragment = node.hints['ucid']
- else: # random name
- fragment = self._uuid()
- node.uri = getattr(self._prefix.file(), fragment)
- return node
-
- def name_preview(self, node: Node) -> Node:
- """Set a bsn:Preview node's uri fragment to its ucid.
- Uses its source fragment as fallback. Appends the size if provided.
- """
- fragment = None
- if 'ucid' in node.hints: # content id
- fragment = node.hints['ucid']
- if fragment is None and 'source' in node.hints: # source id
- self.handle_node(node.hints['source'])
- fragment = node.hints['source'].uri.get('fragment', None)
- if fragment is None: # random name
- fragment = self._uuid()
- if 'size' in node.hints: # append size
- fragment += '_s' + str(node.hints['size'])
- node.uri = getattr(self._prefix.preview(), fragment)
- return node
-
- def name_tag(self, node: Node) -> Node:
- # NOTE: Must ensure to produce the same name for that tags with the same label.
- if 'label' in node.hints: # tag label
- fragment = urllib.parse.quote(node.hints['label'])
- else: # random name
- fragment = self._uuid()
- # FIXME: match to existing tags in bsfs storage!
- node.uri = getattr(self._prefix.tag(), fragment)
- return node
-
- def name_face(self, node: Node) -> Node:
- if 'ucid' in node.hints: # content id
- fragment = node.hints['ucid']
- else: # random name
- fragment = self._uuid()
- node.uri = getattr(self._prefix.face(), fragment)
- return node
-
-
-## EOF ##
diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py
index 30fd6fd..98d9cc8 100644
--- a/bsie/lib/pipeline.py
+++ b/bsie/lib/pipeline.py
@@ -6,8 +6,9 @@ import typing
# bsie imports
from bsie.extractor import Extractor
+from bsie.matcher import nodes
from bsie.reader import Reader
-from bsie.utils import bsfs, errors, node, ns
+from bsie.utils import bsfs, errors, ns
# exports
__all__: typing.Sequence[str] = (
@@ -85,7 +86,7 @@ class Pipeline():
self,
path: bsfs.URI,
principals: typing.Optional[typing.Iterable[bsfs.schema.Predicate]] = None,
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]:
"""Extract triples from the file at *path*. Optionally, limit triples to *principals*."""
# get principals
principals = set(principals) if principals is not None else set(self.schema.predicates())
@@ -104,9 +105,7 @@ class Pipeline():
rdr2ext[rdr].add(ext)
# create subject for file
- subject = node.Node(ns.bsn.Entity,
- ucid=bsfs.uuid.UCID.from_path(path),
- )
+ subject = nodes.Entity(ucid=bsfs.uuid.UCID.from_path(path))
# extract information
for rdr, extrs in rdr2ext.items():
diff --git a/bsie/matcher/__init__.py b/bsie/matcher/__init__.py
new file mode 100644
index 0000000..836bacf
--- /dev/null
+++ b/bsie/matcher/__init__.py
@@ -0,0 +1,17 @@
+
+# standard imports
+import typing
+
+# inner-module imports
+from . import nodes
+from .default_matcher import DefaultMatcher
+from .matcher import Matcher
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'DefaultMatcher',
+ 'Matcher',
+ 'nodes',
+ )
+
+## EOF ##
diff --git a/bsie/matcher/default_matcher.py b/bsie/matcher/default_matcher.py
new file mode 100644
index 0000000..94bbe2c
--- /dev/null
+++ b/bsie/matcher/default_matcher.py
@@ -0,0 +1,76 @@
+
+# standard imports
+import os
+import typing
+import urllib
+
+# bsie imports
+from bsie.utils import bsfs
+
+# inner-module imports
+from . import nodes
+from .matcher import Matcher
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'DefaultMatcher',
+ )
+
+
+## code ##
+
+class DefaultMatcher(Matcher):
+ """Compose URIs as <host/user/node_type#fragment>
+
+ What information is used as fragment depends on the node type.
+ Typically, the default is to use the "ucid" hint.
+ The fallback in all cases is to generate a random uuid.
+
+ Never changes previously assigned uris. Sets uris in-place.
+
+ """
+
+ def __init__(
+ self,
+ host: bsfs.URI,
+ user: str,
+ ):
+ self._prefix = bsfs.Namespace(os.path.join(host, user))
+
+ def match_node(self, node: nodes.Node) -> nodes.Node:
+ if node.uri is not None:
+ return node
+ if isinstance(node, nodes.Entity):
+ return self.match_entity(node)
+ if isinstance(node, nodes.Preview):
+ return self.match_preview(node)
+ if isinstance(node, nodes.Tag):
+ return self.match_tag(node)
+ if isinstance(node, nodes.Face):
+ return self.match_face(node)
+ raise ValueError(f'no matching policy available for bsfs.typename{node}')
+
+ def match_entity(self, node: nodes.Entity) -> nodes.Entity:
+ """Set a bsn:Entity node's uri fragment to its ucid."""
+ node.uri = getattr(self._prefix.file(), node.ucid)
+ return node
+
+ def match_preview(self, node: nodes.Preview) -> nodes.Preview:
+ """Set a bsn:Preview node's uri fragment to its ucid and size suffix."""
+ fragment = node.ucid + '_s' + str(node.size)
+ node.uri = getattr(self._prefix.preview(), fragment)
+ return node
+
+ def match_tag(self, node: nodes.Tag) -> nodes.Tag:
+ """Set a bsn:Tag node's uri to its label."""
+ # FIXME: match to existing tags in bsfs storage?!
+ fragment = urllib.parse.quote(node.label)
+ node.uri = getattr(self._prefix.tag(), fragment)
+ return node
+
+ def match_face(self, node: nodes.Face) -> nodes.Face:
+ """Set a bsn:Face node's uri to its ucid."""
+ node.uri = getattr(self._prefix.face(), node.ucid)
+ return node
+
+## EOF ##
diff --git a/bsie/matcher/matcher.py b/bsie/matcher/matcher.py
new file mode 100644
index 0000000..a89626f
--- /dev/null
+++ b/bsie/matcher/matcher.py
@@ -0,0 +1,61 @@
+
+# standard imports
+import abc
+import typing
+
+# bsie imports
+from bsie.utils import bsfs
+
+# inner-module imports
+from . import nodes
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Matcher',
+ )
+
+
+## code ##
+
+class Matcher():
+ """Determine node uri's from node hints."""
+ def __call__(
+ self,
+ iterable: typing.Iterable[typing.Tuple[nodes.Node, bsfs.URI, typing.Any]],
+ ):
+ """Apply the matcher on a triple iterator."""
+ return MatcherIterator(self, iterable)
+
+ @abc.abstractmethod
+ def match_node(self, node: nodes.Node) -> nodes.Node:
+ """Apply the matcher on a node."""
+
+
+class MatcherIterator():
+ """Iterates over triples, determines uris according to a *matcher* as it goes."""
+
+ # source triple iterator.
+ _iterable: typing.Iterable[typing.Tuple[nodes.Node, bsfs.URI, typing.Any]]
+
+ # node matcher
+ _matcher: Matcher
+
+ def __init__(
+ self,
+ matcher: Matcher,
+ iterable: typing.Iterable[typing.Tuple[nodes.Node, bsfs.URI, typing.Any]],
+ ):
+ self._iterable = iterable
+ self._matcher = matcher
+
+ def __iter__(self):
+ for node, pred, value in self._iterable:
+ # handle subject
+ self._matcher.match_node(node)
+ # handle value
+ if isinstance(value, nodes.Node):
+ self._matcher.match_node(value)
+ # yield triple
+ yield node, pred, value
+
+## EOF ##
diff --git a/bsie/matcher/nodes.py b/bsie/matcher/nodes.py
new file mode 100644
index 0000000..047e7d1
--- /dev/null
+++ b/bsie/matcher/nodes.py
@@ -0,0 +1,49 @@
+
+# standard imports
+from dataclasses import dataclass
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, ns
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'Entity',
+ 'Face',
+ 'Node',
+ 'Person',
+ 'Preview',
+ 'Tag',
+ )
+
+@dataclass(kw_only=True, unsafe_hash=True)
+class Node: # pylint: disable=missing-class-docstring
+ # FIXME: Only allow changes to uri after init
+ uri: typing.Optional[bsfs.URI] = None
+
+@dataclass(kw_only=True, unsafe_hash=True)
+class Entity(Node): # pylint: disable=missing-class-docstring
+ node_type: bsfs.URI = ns.bsn.Entity
+ ucid: str
+
+@dataclass(kw_only=True, unsafe_hash=True)
+class Face(Node): # pylint: disable=missing-class-docstring
+ node_type: bsfs.URI = ns.bsn.Face
+ ucid: str
+
+@dataclass(kw_only=True, unsafe_hash=True)
+class Person(Node): # pylint: disable=missing-class-docstring
+ node_type: bsfs.URI = ns.bsn.Person
+
+@dataclass(kw_only=True, unsafe_hash=True)
+class Preview(Node): # pylint: disable=missing-class-docstring
+ node_type: bsfs.URI = ns.bsn.Preview
+ ucid: str
+ size: int
+
+@dataclass(kw_only=True, unsafe_hash=True)
+class Tag(Node): # pylint: disable=missing-class-docstring
+ node_type: bsfs.URI = ns.bsn.Tag
+ label: str
+
+## EOF ##
diff --git a/bsie/reader/face.py b/bsie/reader/face.py
index c5374e0..e43b93f 100644
--- a/bsie/reader/face.py
+++ b/bsie/reader/face.py
@@ -9,7 +9,7 @@ import PIL.Image
import torch
# bsie imports
-from bsie.utils import bsfs, errors, node, ns
+from bsie.utils import bsfs, errors, ns
# inner-module imports
from . import base
diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py
index 4f08604..0c96139 100644
--- a/bsie/utils/__init__.py
+++ b/bsie/utils/__init__.py
@@ -7,7 +7,6 @@ import typing
from . import bsfs
from . import filematcher
from . import namespaces as ns
-from . import node
from .filewalker import list_files
from .loading import safe_load, unpack_qualified_name
@@ -15,7 +14,6 @@ from .loading import safe_load, unpack_qualified_name
__all__: typing.Sequence[str] = (
'bsfs',
'filematcher',
- 'node',
'ns',
'safe_load',
'unpack_qualified_name',
diff --git a/bsie/utils/node.py b/bsie/utils/node.py
deleted file mode 100644
index fa34b2e..0000000
--- a/bsie/utils/node.py
+++ /dev/null
@@ -1,66 +0,0 @@
-"""Lighweight Node to bridge to BSFS.
-"""
-# standard imports
-import typing
-
-# bsie imports
-from bsie.utils import bsfs
-
-# exports
-__all__: typing.Sequence[str] = (
- 'Node',
- )
-
-
-## code ##
-
-class Node():
- """Lightweight Node, disconnected from any bsfs structures.
-
- In most cases, provide *hints* and leave setting the uri to a node
- naming policy. Only provide an *uri* if it is absolutely determined.
-
- """
-
- # node type.
- node_type: bsfs.URI
-
- # node URI.
- uri: typing.Optional[bsfs.URI]
-
- # node naming hints.
- hits: dict
-
- def __init__(
- self,
- node_type: bsfs.URI,
- uri: typing.Optional[bsfs.URI] = None,
- **uri_hints,
- ):
- # assign members
- self.node_type = bsfs.URI(node_type)
- self.hints = uri_hints
- self.uri = uri
-
- def __eq__(self, other: typing.Any) -> bool:
- """Compare two Node instances based on type and uri.
- Compares hits only if the uri is not yet specified.
- """
- return isinstance(other, Node) \
- and other.node_type == self.node_type \
- and other.uri == self.uri \
- and (self.uri is not None or self.hints == other.hints)
-
- def __hash__(self) -> int:
- identifier = self.uri
- if identifier is None:
- identifier = tuple((key, self.hints[key]) for key in sorted(self.hints))
- return hash((type(self), self.node_type, identifier))
-
- def __str__(self) -> str:
- return f'{bsfs.typename(self)}({self.node_type}, {self.uri})'
-
- def __repr__(self) -> str:
- return f'{bsfs.typename(self)}({self.node_type}, {self.uri})'
-
-## EOF ##