diff options
Diffstat (limited to 'bsie')
-rw-r--r-- | bsie/apps/index.py | 11 | ||||
-rw-r--r-- | bsie/extractor/base.py | 7 | ||||
-rw-r--r-- | bsie/extractor/generic/constant.py | 7 | ||||
-rw-r--r-- | bsie/extractor/generic/path.py | 8 | ||||
-rw-r--r-- | bsie/extractor/generic/stat.py | 7 | ||||
-rw-r--r-- | bsie/extractor/image/colors_spatial.py | 7 | ||||
-rw-r--r-- | bsie/extractor/image/face/detect.py | 9 | ||||
-rw-r--r-- | bsie/extractor/image/face/identify.py | 11 | ||||
-rw-r--r-- | bsie/extractor/image/iptc.py | 13 | ||||
-rw-r--r-- | bsie/extractor/image/photometrics.py | 7 | ||||
-rw-r--r-- | bsie/extractor/preview.py | 10 | ||||
-rw-r--r-- | bsie/lib/__init__.py | 1 | ||||
-rw-r--r-- | bsie/lib/bsie.py | 12 | ||||
-rw-r--r-- | bsie/lib/naming_policy.py | 141 | ||||
-rw-r--r-- | bsie/lib/pipeline.py | 9 | ||||
-rw-r--r-- | bsie/matcher/__init__.py | 17 | ||||
-rw-r--r-- | bsie/matcher/default_matcher.py | 76 | ||||
-rw-r--r-- | bsie/matcher/matcher.py | 61 | ||||
-rw-r--r-- | bsie/matcher/nodes.py | 49 | ||||
-rw-r--r-- | bsie/reader/face.py | 2 | ||||
-rw-r--r-- | bsie/utils/__init__.py | 2 | ||||
-rw-r--r-- | bsie/utils/node.py | 66 |
22 files changed, 268 insertions, 265 deletions
diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 7dda6f4..260d3c8 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -5,8 +5,9 @@ import os import typing # bsie imports -from bsie.lib import BSIE, DefaultNamingPolicy -from bsie.utils import bsfs, errors, node as node_, list_files +from bsie.lib import BSIE +from bsie.matcher import nodes, DefaultMatcher +from bsie.utils import bsfs, errors, list_files # inner-module imports from . import _loader @@ -45,13 +46,13 @@ def main(argv): # build pipeline pipeline = _loader.load_pipeline(args.config) - # build the naming policy - naming_policy = DefaultNamingPolicy( + # build the node matcher + matcher = DefaultMatcher( host=args.host, user=args.user, ) # build BSIE frontend - bsie = BSIE(pipeline, naming_policy, args.collect, args.discard) + bsie = BSIE(pipeline, matcher, args.collect, args.discard) def walk(handle): """Walk through given input files.""" diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py index f92d7cc..bb2ee81 100644 --- a/bsie/extractor/base.py +++ b/bsie/extractor/base.py @@ -5,7 +5,8 @@ import abc import typing # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # exports __all__: typing.Sequence[str] = ( @@ -106,10 +107,10 @@ class Extractor(abc.ABC): @abc.abstractmethod def extract( self, - subject: node.Node, + subject: nodes.Entity, content: typing.Any, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: """Return (node, predicate, value) triples.""" # FIXME: type annotation could be more strict: value is Hashable diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index 7acbe95..e038c0b 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -4,7 +4,8 @@ import typing # bsie imports -from bsie.utils import bsfs, node +from bsie.matcher import nodes +from bsie.utils import bsfs # inner-module imports from .. import base @@ -44,10 +45,10 @@ class Constant(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: None, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: for pred, value in self._tuples: if pred in principals: yield subject, pred, value diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 30d75cf..7fe157b 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -5,7 +5,8 @@ import typing # bsie imports from bsie.extractor import base -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # exports __all__: typing.Sequence[str] = ( @@ -41,14 +42,15 @@ class Path(base.Extractor): ''')) self._callmap = { self.schema.predicate(ns.bse.filename): self.__filename, + self.schema.predicate(ns.bse.dirname): self.__dirname, } def extract( self, - subject: node.Node, + subject: nodes.Entity, content: str, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: for pred in principals: # find callback clbk = self._callmap.get(pred) diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 92b51f3..ff51cff 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -5,7 +5,8 @@ import os import typing # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # inner-module imports from .. import base @@ -41,10 +42,10 @@ class Stat(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: os.stat_result, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: for pred in principals: # find callback clbk = self._callmap.get(pred) diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py index e6661a9..bccefc1 100644 --- a/bsie/extractor/image/colors_spatial.py +++ b/bsie/extractor/image/colors_spatial.py @@ -8,7 +8,8 @@ import PIL.Image import numpy as np # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # inner-module imports from .. import base @@ -115,10 +116,10 @@ class ColorsSpatial(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: PIL.Image.Image, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: # check principals if self.schema.predicate(self._predicate_name) not in principals: # nothing to do; abort diff --git a/bsie/extractor/image/face/detect.py b/bsie/extractor/image/face/detect.py index 94e3a61..51d5659 100644 --- a/bsie/extractor/image/face/detect.py +++ b/bsie/extractor/image/face/detect.py @@ -7,7 +7,8 @@ import torch from facenet_pytorch import MTCNN, InceptionResnetV1 # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # inner-module imports from ... import base @@ -72,17 +73,17 @@ class FaceDetect(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: dict, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: # check principals if self.schema.predicate(ns.bse.face) not in principals: # nothing to do; abort return for face in content: - fnode = node.Node(ns.bsn.Face, ucid=face['ucid']) + fnode = nodes.Face(ucid=face['ucid']) yield subject, ns.bse.face, fnode yield fnode, bsf.x, face['x'] yield fnode, bsf.y, face['y'] diff --git a/bsie/extractor/image/face/identify.py b/bsie/extractor/image/face/identify.py index 152f113..44a75c4 100644 --- a/bsie/extractor/image/face/identify.py +++ b/bsie/extractor/image/face/identify.py @@ -9,7 +9,8 @@ import numpy as np import torch # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # inner-module imports from ... import base @@ -142,10 +143,10 @@ class FaceIdentify(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: typing.Any, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: # check principals #if self.schema.predicate(bsf.depicts) not in principals: if self.schema.predicate(ns.bse.face) not in principals: @@ -164,8 +165,8 @@ class FaceIdentify(base.Extractor): lbl = bsfs.URI(self._id2name[idx]) # label (uri) of nearest neighbour if lbl == self._restklasse: # suppress continue - pnode = node.Node(ns.bsn.Person, uri=lbl) - fnode = node.Node(ns.bsn.Face, ucid=face['ucid']) + pnode = nodes.Person(uri=lbl) + fnode = nodes.Face(ucid=face['ucid']) # emit triple yield fnode, self.schema.predicate(bsf.depicts), pnode # FIXME: emit subject -> face -> fnode? diff --git a/bsie/extractor/image/iptc.py b/bsie/extractor/image/iptc.py index 195eff7..0c03539 100644 --- a/bsie/extractor/image/iptc.py +++ b/bsie/extractor/image/iptc.py @@ -3,7 +3,8 @@ import typing # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # inner-module imports from .. import base @@ -41,10 +42,10 @@ class Iptc(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: dict, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: for pred in principals: # find callback clbk = self._callmap.get(pred) @@ -55,13 +56,13 @@ class Iptc(base.Extractor): def _keywords( self, - subject: node.Node, + subject: nodes.Entity, content: dict, - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: if 'Iptc.Application2.Keywords' not in content: return for keyword in content['Iptc.Application2.Keywords']: - tag = node.Node(ns.bsn.Tag, label=keyword) + tag = nodes.Tag(label=keyword) yield subject, self.schema.predicate(ns.bse.tag), tag yield tag, self.schema.predicate(ns.bst.label), keyword diff --git a/bsie/extractor/image/photometrics.py b/bsie/extractor/image/photometrics.py index 42eb3c8..4579b54 100644 --- a/bsie/extractor/image/photometrics.py +++ b/bsie/extractor/image/photometrics.py @@ -4,7 +4,8 @@ from fractions import Fraction import typing # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # inner-module imports from .. import base @@ -107,10 +108,10 @@ class Exif(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: dict, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: for pred in principals: # find callback clbk = self._callmap.get(pred) diff --git a/bsie/extractor/preview.py b/bsie/extractor/preview.py index 145a01a..fcda71c 100644 --- a/bsie/extractor/preview.py +++ b/bsie/extractor/preview.py @@ -7,7 +7,8 @@ import typing import PIL.Image # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # inner-module imports from . import base @@ -67,10 +68,10 @@ class Preview(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: typing.Callable[[int], PIL.Image.Image], principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: # check principals if self.schema.predicate(ns.bse.preview) not in principals: return @@ -82,10 +83,9 @@ class Preview(base.Extractor): buffer = io.BytesIO() img.save(buffer, format='jpeg') # create a preview node - preview = node.Node(ns.bsn.Preview, + preview = nodes.Preview( ucid=bsfs.uuid.UCID.from_bytes(buffer.getvalue()), size=max_side, - source=subject, ) # yield triples yield subject, self.schema.predicate(ns.bse.preview), preview diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py index f44fb74..daa806c 100644 --- a/bsie/lib/__init__.py +++ b/bsie/lib/__init__.py @@ -5,7 +5,6 @@ import typing # inner-module imports from .bsie import BSIE from .builder import PipelineBuilder -from .naming_policy import DefaultNamingPolicy # exports __all__: typing.Sequence[str] = ( diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py index b02e707..9aa0bdb 100644 --- a/bsie/lib/bsie.py +++ b/bsie/lib/bsie.py @@ -3,10 +3,10 @@ import typing # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.utils import bsfs, ns +from bsie.matcher import Matcher, nodes # inner-module imports -from .naming_policy import NamingPolicy from .pipeline import Pipeline # exports @@ -40,7 +40,7 @@ class BSIE(): # pipeline. pipeline: Pipeline, # naming policy - naming_policy: NamingPolicy, + matcher: Matcher, # principals to extract at most. None implies all available w.r.t. extractors. collect: typing.Optional[typing.Iterable[bsfs.URI]] = None, # principals to discard. @@ -48,7 +48,7 @@ class BSIE(): ): # store pipeline and naming policy self._pipeline = pipeline - self._naming_policy = naming_policy + self._matcher = matcher # start off with available principals self._principals = {pred.uri for pred in self._pipeline.principals} # limit principals to specified ones by argument. @@ -79,7 +79,7 @@ class BSIE(): self, path: bsfs.URI, principals: typing.Optional[typing.Iterable[bsfs.URI]] = None, - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.URI, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.URI, typing.Any]]: """Produce triples for a given *path*. Limit to *principals* if given.""" # get requested principals. principals = set(principals) if principals is not None else self._principals @@ -88,6 +88,6 @@ class BSIE(): # predicate lookup principals = {self.schema.predicate(pred) for pred in principals} # invoke pipeline - yield from self._naming_policy(self._pipeline(path, principals)) + yield from self._matcher(self._pipeline(path, principals)) ## EOF ## diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py deleted file mode 100644 index fbdbeb0..0000000 --- a/bsie/lib/naming_policy.py +++ /dev/null @@ -1,141 +0,0 @@ - -# standard imports -import abc -import os -import typing - -# external imports -import urllib.parse - -# bsie imports -from bsie.utils import bsfs, errors, ns -from bsie.utils.node import Node - -# exports -__all__: typing.Sequence[str] = ( - 'DefaultNamingPolicy', - ) - - -## code ## - -class NamingPolicy(): - """Determine node uri's from node hints.""" - def __call__( - self, - iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]], - ): - """Apply the policy on a triple iterator.""" - return NamingPolicyIterator(self, iterable) - - @abc.abstractmethod - def handle_node(self, node: Node) -> Node: - """Apply the policy on a node.""" - - -class NamingPolicyIterator(): - """Iterates over triples, determines uris according to a *policy* as it goes.""" - - # source triple iterator. - _iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]] - - # naming policy - _policy: NamingPolicy - - def __init__( - self, - policy: NamingPolicy, - iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]], - ): - self._iterable = iterable - self._policy = policy - - def __iter__(self): - for node, pred, value in self._iterable: - # handle subject - self._policy.handle_node(node) - # handle value - if isinstance(value, Node): - self._policy.handle_node(value) - # yield triple - yield node, pred, value - - -class DefaultNamingPolicy(NamingPolicy): - """Compose URIs as <host/user/node_type#fragment> - - What information is used as fragment depends on the node type. - Typically, the default is to use the "ucid" hint. - The fallback in all cases is to generate a random uuid. - - Never changes previously assigned uris. Sets uris in-place. - - """ - - def __init__( - self, - host: bsfs.URI, - user: str, - ): - self._prefix = bsfs.Namespace(os.path.join(host, user)) - self._uuid = bsfs.uuid.UUID() - - def handle_node(self, node: Node) -> Node: - if node.uri is not None: - return node - if node.node_type == ns.bsn.Entity: - return self.name_entity(node) - if node.node_type == ns.bsn.Preview: - return self.name_preview(node) - if node.node_type == ns.bsn.Tag: - return self.name_tag(node) - if node.node_type == ns.bsn.Face: - return self.name_face(node) - raise errors.ProgrammingError(f'no naming policy available for {node.node_type}') - - def name_entity(self, node: Node) -> Node: - """Set a bsn:Entity node's uri fragment to its ucid.""" - if 'ucid' in node.hints: # content id - fragment = node.hints['ucid'] - else: # random name - fragment = self._uuid() - node.uri = getattr(self._prefix.file(), fragment) - return node - - def name_preview(self, node: Node) -> Node: - """Set a bsn:Preview node's uri fragment to its ucid. - Uses its source fragment as fallback. Appends the size if provided. - """ - fragment = None - if 'ucid' in node.hints: # content id - fragment = node.hints['ucid'] - if fragment is None and 'source' in node.hints: # source id - self.handle_node(node.hints['source']) - fragment = node.hints['source'].uri.get('fragment', None) - if fragment is None: # random name - fragment = self._uuid() - if 'size' in node.hints: # append size - fragment += '_s' + str(node.hints['size']) - node.uri = getattr(self._prefix.preview(), fragment) - return node - - def name_tag(self, node: Node) -> Node: - # NOTE: Must ensure to produce the same name for that tags with the same label. - if 'label' in node.hints: # tag label - fragment = urllib.parse.quote(node.hints['label']) - else: # random name - fragment = self._uuid() - # FIXME: match to existing tags in bsfs storage! - node.uri = getattr(self._prefix.tag(), fragment) - return node - - def name_face(self, node: Node) -> Node: - if 'ucid' in node.hints: # content id - fragment = node.hints['ucid'] - else: # random name - fragment = self._uuid() - node.uri = getattr(self._prefix.face(), fragment) - return node - - -## EOF ## diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py index 30fd6fd..98d9cc8 100644 --- a/bsie/lib/pipeline.py +++ b/bsie/lib/pipeline.py @@ -6,8 +6,9 @@ import typing # bsie imports from bsie.extractor import Extractor +from bsie.matcher import nodes from bsie.reader import Reader -from bsie.utils import bsfs, errors, node, ns +from bsie.utils import bsfs, errors, ns # exports __all__: typing.Sequence[str] = ( @@ -85,7 +86,7 @@ class Pipeline(): self, path: bsfs.URI, principals: typing.Optional[typing.Iterable[bsfs.schema.Predicate]] = None, - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: """Extract triples from the file at *path*. Optionally, limit triples to *principals*.""" # get principals principals = set(principals) if principals is not None else set(self.schema.predicates()) @@ -104,9 +105,7 @@ class Pipeline(): rdr2ext[rdr].add(ext) # create subject for file - subject = node.Node(ns.bsn.Entity, - ucid=bsfs.uuid.UCID.from_path(path), - ) + subject = nodes.Entity(ucid=bsfs.uuid.UCID.from_path(path)) # extract information for rdr, extrs in rdr2ext.items(): diff --git a/bsie/matcher/__init__.py b/bsie/matcher/__init__.py new file mode 100644 index 0000000..836bacf --- /dev/null +++ b/bsie/matcher/__init__.py @@ -0,0 +1,17 @@ + +# standard imports +import typing + +# inner-module imports +from . import nodes +from .default_matcher import DefaultMatcher +from .matcher import Matcher + +# exports +__all__: typing.Sequence[str] = ( + 'DefaultMatcher', + 'Matcher', + 'nodes', + ) + +## EOF ## diff --git a/bsie/matcher/default_matcher.py b/bsie/matcher/default_matcher.py new file mode 100644 index 0000000..94bbe2c --- /dev/null +++ b/bsie/matcher/default_matcher.py @@ -0,0 +1,76 @@ + +# standard imports +import os +import typing +import urllib + +# bsie imports +from bsie.utils import bsfs + +# inner-module imports +from . import nodes +from .matcher import Matcher + +# exports +__all__: typing.Sequence[str] = ( + 'DefaultMatcher', + ) + + +## code ## + +class DefaultMatcher(Matcher): + """Compose URIs as <host/user/node_type#fragment> + + What information is used as fragment depends on the node type. + Typically, the default is to use the "ucid" hint. + The fallback in all cases is to generate a random uuid. + + Never changes previously assigned uris. Sets uris in-place. + + """ + + def __init__( + self, + host: bsfs.URI, + user: str, + ): + self._prefix = bsfs.Namespace(os.path.join(host, user)) + + def match_node(self, node: nodes.Node) -> nodes.Node: + if node.uri is not None: + return node + if isinstance(node, nodes.Entity): + return self.match_entity(node) + if isinstance(node, nodes.Preview): + return self.match_preview(node) + if isinstance(node, nodes.Tag): + return self.match_tag(node) + if isinstance(node, nodes.Face): + return self.match_face(node) + raise ValueError(f'no matching policy available for bsfs.typename{node}') + + def match_entity(self, node: nodes.Entity) -> nodes.Entity: + """Set a bsn:Entity node's uri fragment to its ucid.""" + node.uri = getattr(self._prefix.file(), node.ucid) + return node + + def match_preview(self, node: nodes.Preview) -> nodes.Preview: + """Set a bsn:Preview node's uri fragment to its ucid and size suffix.""" + fragment = node.ucid + '_s' + str(node.size) + node.uri = getattr(self._prefix.preview(), fragment) + return node + + def match_tag(self, node: nodes.Tag) -> nodes.Tag: + """Set a bsn:Tag node's uri to its label.""" + # FIXME: match to existing tags in bsfs storage?! + fragment = urllib.parse.quote(node.label) + node.uri = getattr(self._prefix.tag(), fragment) + return node + + def match_face(self, node: nodes.Face) -> nodes.Face: + """Set a bsn:Face node's uri to its ucid.""" + node.uri = getattr(self._prefix.face(), node.ucid) + return node + +## EOF ## diff --git a/bsie/matcher/matcher.py b/bsie/matcher/matcher.py new file mode 100644 index 0000000..a89626f --- /dev/null +++ b/bsie/matcher/matcher.py @@ -0,0 +1,61 @@ + +# standard imports +import abc +import typing + +# bsie imports +from bsie.utils import bsfs + +# inner-module imports +from . import nodes + +# exports +__all__: typing.Sequence[str] = ( + 'Matcher', + ) + + +## code ## + +class Matcher(): + """Determine node uri's from node hints.""" + def __call__( + self, + iterable: typing.Iterable[typing.Tuple[nodes.Node, bsfs.URI, typing.Any]], + ): + """Apply the matcher on a triple iterator.""" + return MatcherIterator(self, iterable) + + @abc.abstractmethod + def match_node(self, node: nodes.Node) -> nodes.Node: + """Apply the matcher on a node.""" + + +class MatcherIterator(): + """Iterates over triples, determines uris according to a *matcher* as it goes.""" + + # source triple iterator. + _iterable: typing.Iterable[typing.Tuple[nodes.Node, bsfs.URI, typing.Any]] + + # node matcher + _matcher: Matcher + + def __init__( + self, + matcher: Matcher, + iterable: typing.Iterable[typing.Tuple[nodes.Node, bsfs.URI, typing.Any]], + ): + self._iterable = iterable + self._matcher = matcher + + def __iter__(self): + for node, pred, value in self._iterable: + # handle subject + self._matcher.match_node(node) + # handle value + if isinstance(value, nodes.Node): + self._matcher.match_node(value) + # yield triple + yield node, pred, value + +## EOF ## diff --git a/bsie/matcher/nodes.py b/bsie/matcher/nodes.py new file mode 100644 index 0000000..047e7d1 --- /dev/null +++ b/bsie/matcher/nodes.py @@ -0,0 +1,49 @@ + +# standard imports +from dataclasses import dataclass +import typing + +# bsie imports +from bsie.utils import bsfs, ns + +# exports +__all__: typing.Sequence[str] = ( + 'Entity', + 'Face', + 'Node', + 'Person', + 'Preview', + 'Tag', + ) + +@dataclass(kw_only=True, unsafe_hash=True) +class Node: # pylint: disable=missing-class-docstring + # FIXME: Only allow changes to uri after init + uri: typing.Optional[bsfs.URI] = None + +@dataclass(kw_only=True, unsafe_hash=True) +class Entity(Node): # pylint: disable=missing-class-docstring + node_type: bsfs.URI = ns.bsn.Entity + ucid: str + +@dataclass(kw_only=True, unsafe_hash=True) +class Face(Node): # pylint: disable=missing-class-docstring + node_type: bsfs.URI = ns.bsn.Face + ucid: str + +@dataclass(kw_only=True, unsafe_hash=True) +class Person(Node): # pylint: disable=missing-class-docstring + node_type: bsfs.URI = ns.bsn.Person + +@dataclass(kw_only=True, unsafe_hash=True) +class Preview(Node): # pylint: disable=missing-class-docstring + node_type: bsfs.URI = ns.bsn.Preview + ucid: str + size: int + +@dataclass(kw_only=True, unsafe_hash=True) +class Tag(Node): # pylint: disable=missing-class-docstring + node_type: bsfs.URI = ns.bsn.Tag + label: str + +## EOF ## diff --git a/bsie/reader/face.py b/bsie/reader/face.py index c5374e0..e43b93f 100644 --- a/bsie/reader/face.py +++ b/bsie/reader/face.py @@ -9,7 +9,7 @@ import PIL.Image import torch # bsie imports -from bsie.utils import bsfs, errors, node, ns +from bsie.utils import bsfs, errors, ns # inner-module imports from . import base diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py index 4f08604..0c96139 100644 --- a/bsie/utils/__init__.py +++ b/bsie/utils/__init__.py @@ -7,7 +7,6 @@ import typing from . import bsfs from . import filematcher from . import namespaces as ns -from . import node from .filewalker import list_files from .loading import safe_load, unpack_qualified_name @@ -15,7 +14,6 @@ from .loading import safe_load, unpack_qualified_name __all__: typing.Sequence[str] = ( 'bsfs', 'filematcher', - 'node', 'ns', 'safe_load', 'unpack_qualified_name', diff --git a/bsie/utils/node.py b/bsie/utils/node.py deleted file mode 100644 index fa34b2e..0000000 --- a/bsie/utils/node.py +++ /dev/null @@ -1,66 +0,0 @@ -"""Lighweight Node to bridge to BSFS. -""" -# standard imports -import typing - -# bsie imports -from bsie.utils import bsfs - -# exports -__all__: typing.Sequence[str] = ( - 'Node', - ) - - -## code ## - -class Node(): - """Lightweight Node, disconnected from any bsfs structures. - - In most cases, provide *hints* and leave setting the uri to a node - naming policy. Only provide an *uri* if it is absolutely determined. - - """ - - # node type. - node_type: bsfs.URI - - # node URI. - uri: typing.Optional[bsfs.URI] - - # node naming hints. - hits: dict - - def __init__( - self, - node_type: bsfs.URI, - uri: typing.Optional[bsfs.URI] = None, - **uri_hints, - ): - # assign members - self.node_type = bsfs.URI(node_type) - self.hints = uri_hints - self.uri = uri - - def __eq__(self, other: typing.Any) -> bool: - """Compare two Node instances based on type and uri. - Compares hits only if the uri is not yet specified. - """ - return isinstance(other, Node) \ - and other.node_type == self.node_type \ - and other.uri == self.uri \ - and (self.uri is not None or self.hints == other.hints) - - def __hash__(self) -> int: - identifier = self.uri - if identifier is None: - identifier = tuple((key, self.hints[key]) for key in sorted(self.hints)) - return hash((type(self), self.node_type, identifier)) - - def __str__(self) -> str: - return f'{bsfs.typename(self)}({self.node_type}, {self.uri})' - - def __repr__(self) -> str: - return f'{bsfs.typename(self)}({self.node_type}, {self.uri})' - -## EOF ## |