diff options
39 files changed, 591 insertions, 559 deletions
diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 7dda6f4..260d3c8 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -5,8 +5,9 @@ import os import typing # bsie imports -from bsie.lib import BSIE, DefaultNamingPolicy -from bsie.utils import bsfs, errors, node as node_, list_files +from bsie.lib import BSIE +from bsie.matcher import nodes, DefaultMatcher +from bsie.utils import bsfs, errors, list_files # inner-module imports from . import _loader @@ -45,13 +46,13 @@ def main(argv): # build pipeline pipeline = _loader.load_pipeline(args.config) - # build the naming policy - naming_policy = DefaultNamingPolicy( + # build the node matcher + matcher = DefaultMatcher( host=args.host, user=args.user, ) # build BSIE frontend - bsie = BSIE(pipeline, naming_policy, args.collect, args.discard) + bsie = BSIE(pipeline, matcher, args.collect, args.discard) def walk(handle): """Walk through given input files.""" diff --git a/bsie/extractor/base.py b/bsie/extractor/base.py index f92d7cc..bb2ee81 100644 --- a/bsie/extractor/base.py +++ b/bsie/extractor/base.py @@ -5,7 +5,8 @@ import abc import typing # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # exports __all__: typing.Sequence[str] = ( @@ -106,10 +107,10 @@ class Extractor(abc.ABC): @abc.abstractmethod def extract( self, - subject: node.Node, + subject: nodes.Entity, content: typing.Any, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: """Return (node, predicate, value) triples.""" # FIXME: type annotation could be more strict: value is Hashable diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py index 7acbe95..e038c0b 100644 --- a/bsie/extractor/generic/constant.py +++ b/bsie/extractor/generic/constant.py @@ -4,7 +4,8 @@ import typing # bsie imports -from bsie.utils import bsfs, node +from bsie.matcher import nodes +from bsie.utils import bsfs # inner-module imports from .. import base @@ -44,10 +45,10 @@ class Constant(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: None, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: for pred, value in self._tuples: if pred in principals: yield subject, pred, value diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 30d75cf..7fe157b 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -5,7 +5,8 @@ import typing # bsie imports from bsie.extractor import base -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # exports __all__: typing.Sequence[str] = ( @@ -41,14 +42,15 @@ class Path(base.Extractor): ''')) self._callmap = { self.schema.predicate(ns.bse.filename): self.__filename, + self.schema.predicate(ns.bse.dirname): self.__dirname, } def extract( self, - subject: node.Node, + subject: nodes.Entity, content: str, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: for pred in principals: # find callback clbk = self._callmap.get(pred) diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index 92b51f3..ff51cff 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -5,7 +5,8 @@ import os import typing # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # inner-module imports from .. import base @@ -41,10 +42,10 @@ class Stat(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: os.stat_result, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: for pred in principals: # find callback clbk = self._callmap.get(pred) diff --git a/bsie/extractor/image/colors_spatial.py b/bsie/extractor/image/colors_spatial.py index e6661a9..bccefc1 100644 --- a/bsie/extractor/image/colors_spatial.py +++ b/bsie/extractor/image/colors_spatial.py @@ -8,7 +8,8 @@ import PIL.Image import numpy as np # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # inner-module imports from .. import base @@ -115,10 +116,10 @@ class ColorsSpatial(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: PIL.Image.Image, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: # check principals if self.schema.predicate(self._predicate_name) not in principals: # nothing to do; abort diff --git a/bsie/extractor/image/face/detect.py b/bsie/extractor/image/face/detect.py index 94e3a61..51d5659 100644 --- a/bsie/extractor/image/face/detect.py +++ b/bsie/extractor/image/face/detect.py @@ -7,7 +7,8 @@ import torch from facenet_pytorch import MTCNN, InceptionResnetV1 # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # inner-module imports from ... import base @@ -72,17 +73,17 @@ class FaceDetect(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: dict, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: # check principals if self.schema.predicate(ns.bse.face) not in principals: # nothing to do; abort return for face in content: - fnode = node.Node(ns.bsn.Face, ucid=face['ucid']) + fnode = nodes.Face(ucid=face['ucid']) yield subject, ns.bse.face, fnode yield fnode, bsf.x, face['x'] yield fnode, bsf.y, face['y'] diff --git a/bsie/extractor/image/face/identify.py b/bsie/extractor/image/face/identify.py index 152f113..44a75c4 100644 --- a/bsie/extractor/image/face/identify.py +++ b/bsie/extractor/image/face/identify.py @@ -9,7 +9,8 @@ import numpy as np import torch # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # inner-module imports from ... import base @@ -142,10 +143,10 @@ class FaceIdentify(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: typing.Any, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: # check principals #if self.schema.predicate(bsf.depicts) not in principals: if self.schema.predicate(ns.bse.face) not in principals: @@ -164,8 +165,8 @@ class FaceIdentify(base.Extractor): lbl = bsfs.URI(self._id2name[idx]) # label (uri) of nearest neighbour if lbl == self._restklasse: # suppress continue - pnode = node.Node(ns.bsn.Person, uri=lbl) - fnode = node.Node(ns.bsn.Face, ucid=face['ucid']) + pnode = nodes.Person(uri=lbl) + fnode = nodes.Face(ucid=face['ucid']) # emit triple yield fnode, self.schema.predicate(bsf.depicts), pnode # FIXME: emit subject -> face -> fnode? diff --git a/bsie/extractor/image/iptc.py b/bsie/extractor/image/iptc.py index 195eff7..0c03539 100644 --- a/bsie/extractor/image/iptc.py +++ b/bsie/extractor/image/iptc.py @@ -3,7 +3,8 @@ import typing # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # inner-module imports from .. import base @@ -41,10 +42,10 @@ class Iptc(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: dict, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: for pred in principals: # find callback clbk = self._callmap.get(pred) @@ -55,13 +56,13 @@ class Iptc(base.Extractor): def _keywords( self, - subject: node.Node, + subject: nodes.Entity, content: dict, - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: if 'Iptc.Application2.Keywords' not in content: return for keyword in content['Iptc.Application2.Keywords']: - tag = node.Node(ns.bsn.Tag, label=keyword) + tag = nodes.Tag(label=keyword) yield subject, self.schema.predicate(ns.bse.tag), tag yield tag, self.schema.predicate(ns.bst.label), keyword diff --git a/bsie/extractor/image/photometrics.py b/bsie/extractor/image/photometrics.py index 42eb3c8..4579b54 100644 --- a/bsie/extractor/image/photometrics.py +++ b/bsie/extractor/image/photometrics.py @@ -4,7 +4,8 @@ from fractions import Fraction import typing # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # inner-module imports from .. import base @@ -107,10 +108,10 @@ class Exif(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: dict, principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: for pred in principals: # find callback clbk = self._callmap.get(pred) diff --git a/bsie/extractor/preview.py b/bsie/extractor/preview.py index 145a01a..fcda71c 100644 --- a/bsie/extractor/preview.py +++ b/bsie/extractor/preview.py @@ -7,7 +7,8 @@ import typing import PIL.Image # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # inner-module imports from . import base @@ -67,10 +68,10 @@ class Preview(base.Extractor): def extract( self, - subject: node.Node, + subject: nodes.Entity, content: typing.Callable[[int], PIL.Image.Image], principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: # check principals if self.schema.predicate(ns.bse.preview) not in principals: return @@ -82,10 +83,9 @@ class Preview(base.Extractor): buffer = io.BytesIO() img.save(buffer, format='jpeg') # create a preview node - preview = node.Node(ns.bsn.Preview, + preview = nodes.Preview( ucid=bsfs.uuid.UCID.from_bytes(buffer.getvalue()), size=max_side, - source=subject, ) # yield triples yield subject, self.schema.predicate(ns.bse.preview), preview diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py index f44fb74..daa806c 100644 --- a/bsie/lib/__init__.py +++ b/bsie/lib/__init__.py @@ -5,7 +5,6 @@ import typing # inner-module imports from .bsie import BSIE from .builder import PipelineBuilder -from .naming_policy import DefaultNamingPolicy # exports __all__: typing.Sequence[str] = ( diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py index b02e707..9aa0bdb 100644 --- a/bsie/lib/bsie.py +++ b/bsie/lib/bsie.py @@ -3,10 +3,10 @@ import typing # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.utils import bsfs, ns +from bsie.matcher import Matcher, nodes # inner-module imports -from .naming_policy import NamingPolicy from .pipeline import Pipeline # exports @@ -40,7 +40,7 @@ class BSIE(): # pipeline. pipeline: Pipeline, # naming policy - naming_policy: NamingPolicy, + matcher: Matcher, # principals to extract at most. None implies all available w.r.t. extractors. collect: typing.Optional[typing.Iterable[bsfs.URI]] = None, # principals to discard. @@ -48,7 +48,7 @@ class BSIE(): ): # store pipeline and naming policy self._pipeline = pipeline - self._naming_policy = naming_policy + self._matcher = matcher # start off with available principals self._principals = {pred.uri for pred in self._pipeline.principals} # limit principals to specified ones by argument. @@ -79,7 +79,7 @@ class BSIE(): self, path: bsfs.URI, principals: typing.Optional[typing.Iterable[bsfs.URI]] = None, - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.URI, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.URI, typing.Any]]: """Produce triples for a given *path*. Limit to *principals* if given.""" # get requested principals. principals = set(principals) if principals is not None else self._principals @@ -88,6 +88,6 @@ class BSIE(): # predicate lookup principals = {self.schema.predicate(pred) for pred in principals} # invoke pipeline - yield from self._naming_policy(self._pipeline(path, principals)) + yield from self._matcher(self._pipeline(path, principals)) ## EOF ## diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py deleted file mode 100644 index fbdbeb0..0000000 --- a/bsie/lib/naming_policy.py +++ /dev/null @@ -1,141 +0,0 @@ - -# standard imports -import abc -import os -import typing - -# external imports -import urllib.parse - -# bsie imports -from bsie.utils import bsfs, errors, ns -from bsie.utils.node import Node - -# exports -__all__: typing.Sequence[str] = ( - 'DefaultNamingPolicy', - ) - - -## code ## - -class NamingPolicy(): - """Determine node uri's from node hints.""" - def __call__( - self, - iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]], - ): - """Apply the policy on a triple iterator.""" - return NamingPolicyIterator(self, iterable) - - @abc.abstractmethod - def handle_node(self, node: Node) -> Node: - """Apply the policy on a node.""" - - -class NamingPolicyIterator(): - """Iterates over triples, determines uris according to a *policy* as it goes.""" - - # source triple iterator. - _iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]] - - # naming policy - _policy: NamingPolicy - - def __init__( - self, - policy: NamingPolicy, - iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]], - ): - self._iterable = iterable - self._policy = policy - - def __iter__(self): - for node, pred, value in self._iterable: - # handle subject - self._policy.handle_node(node) - # handle value - if isinstance(value, Node): - self._policy.handle_node(value) - # yield triple - yield node, pred, value - - -class DefaultNamingPolicy(NamingPolicy): - """Compose URIs as <host/user/node_type#fragment> - - What information is used as fragment depends on the node type. - Typically, the default is to use the "ucid" hint. - The fallback in all cases is to generate a random uuid. - - Never changes previously assigned uris. Sets uris in-place. - - """ - - def __init__( - self, - host: bsfs.URI, - user: str, - ): - self._prefix = bsfs.Namespace(os.path.join(host, user)) - self._uuid = bsfs.uuid.UUID() - - def handle_node(self, node: Node) -> Node: - if node.uri is not None: - return node - if node.node_type == ns.bsn.Entity: - return self.name_entity(node) - if node.node_type == ns.bsn.Preview: - return self.name_preview(node) - if node.node_type == ns.bsn.Tag: - return self.name_tag(node) - if node.node_type == ns.bsn.Face: - return self.name_face(node) - raise errors.ProgrammingError(f'no naming policy available for {node.node_type}') - - def name_entity(self, node: Node) -> Node: - """Set a bsn:Entity node's uri fragment to its ucid.""" - if 'ucid' in node.hints: # content id - fragment = node.hints['ucid'] - else: # random name - fragment = self._uuid() - node.uri = getattr(self._prefix.file(), fragment) - return node - - def name_preview(self, node: Node) -> Node: - """Set a bsn:Preview node's uri fragment to its ucid. - Uses its source fragment as fallback. Appends the size if provided. - """ - fragment = None - if 'ucid' in node.hints: # content id - fragment = node.hints['ucid'] - if fragment is None and 'source' in node.hints: # source id - self.handle_node(node.hints['source']) - fragment = node.hints['source'].uri.get('fragment', None) - if fragment is None: # random name - fragment = self._uuid() - if 'size' in node.hints: # append size - fragment += '_s' + str(node.hints['size']) - node.uri = getattr(self._prefix.preview(), fragment) - return node - - def name_tag(self, node: Node) -> Node: - # NOTE: Must ensure to produce the same name for that tags with the same label. - if 'label' in node.hints: # tag label - fragment = urllib.parse.quote(node.hints['label']) - else: # random name - fragment = self._uuid() - # FIXME: match to existing tags in bsfs storage! - node.uri = getattr(self._prefix.tag(), fragment) - return node - - def name_face(self, node: Node) -> Node: - if 'ucid' in node.hints: # content id - fragment = node.hints['ucid'] - else: # random name - fragment = self._uuid() - node.uri = getattr(self._prefix.face(), fragment) - return node - - -## EOF ## diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py index 30fd6fd..98d9cc8 100644 --- a/bsie/lib/pipeline.py +++ b/bsie/lib/pipeline.py @@ -6,8 +6,9 @@ import typing # bsie imports from bsie.extractor import Extractor +from bsie.matcher import nodes from bsie.reader import Reader -from bsie.utils import bsfs, errors, node, ns +from bsie.utils import bsfs, errors, ns # exports __all__: typing.Sequence[str] = ( @@ -85,7 +86,7 @@ class Pipeline(): self, path: bsfs.URI, principals: typing.Optional[typing.Iterable[bsfs.schema.Predicate]] = None, - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: """Extract triples from the file at *path*. Optionally, limit triples to *principals*.""" # get principals principals = set(principals) if principals is not None else set(self.schema.predicates()) @@ -104,9 +105,7 @@ class Pipeline(): rdr2ext[rdr].add(ext) # create subject for file - subject = node.Node(ns.bsn.Entity, - ucid=bsfs.uuid.UCID.from_path(path), - ) + subject = nodes.Entity(ucid=bsfs.uuid.UCID.from_path(path)) # extract information for rdr, extrs in rdr2ext.items(): diff --git a/bsie/matcher/__init__.py b/bsie/matcher/__init__.py new file mode 100644 index 0000000..836bacf --- /dev/null +++ b/bsie/matcher/__init__.py @@ -0,0 +1,17 @@ + +# standard imports +import typing + +# inner-module imports +from . import nodes +from .default_matcher import DefaultMatcher +from .matcher import Matcher + +# exports +__all__: typing.Sequence[str] = ( + 'DefaultMatcher', + 'Matcher', + 'nodes', + ) + +## EOF ## diff --git a/bsie/matcher/default_matcher.py b/bsie/matcher/default_matcher.py new file mode 100644 index 0000000..94bbe2c --- /dev/null +++ b/bsie/matcher/default_matcher.py @@ -0,0 +1,76 @@ + +# standard imports +import os +import typing +import urllib + +# bsie imports +from bsie.utils import bsfs + +# inner-module imports +from . import nodes +from .matcher import Matcher + +# exports +__all__: typing.Sequence[str] = ( + 'DefaultMatcher', + ) + + +## code ## + +class DefaultMatcher(Matcher): + """Compose URIs as <host/user/node_type#fragment> + + What information is used as fragment depends on the node type. + Typically, the default is to use the "ucid" hint. + The fallback in all cases is to generate a random uuid. + + Never changes previously assigned uris. Sets uris in-place. + + """ + + def __init__( + self, + host: bsfs.URI, + user: str, + ): + self._prefix = bsfs.Namespace(os.path.join(host, user)) + + def match_node(self, node: nodes.Node) -> nodes.Node: + if node.uri is not None: + return node + if isinstance(node, nodes.Entity): + return self.match_entity(node) + if isinstance(node, nodes.Preview): + return self.match_preview(node) + if isinstance(node, nodes.Tag): + return self.match_tag(node) + if isinstance(node, nodes.Face): + return self.match_face(node) + raise ValueError(f'no matching policy available for bsfs.typename{node}') + + def match_entity(self, node: nodes.Entity) -> nodes.Entity: + """Set a bsn:Entity node's uri fragment to its ucid.""" + node.uri = getattr(self._prefix.file(), node.ucid) + return node + + def match_preview(self, node: nodes.Preview) -> nodes.Preview: + """Set a bsn:Preview node's uri fragment to its ucid and size suffix.""" + fragment = node.ucid + '_s' + str(node.size) + node.uri = getattr(self._prefix.preview(), fragment) + return node + + def match_tag(self, node: nodes.Tag) -> nodes.Tag: + """Set a bsn:Tag node's uri to its label.""" + # FIXME: match to existing tags in bsfs storage?! + fragment = urllib.parse.quote(node.label) + node.uri = getattr(self._prefix.tag(), fragment) + return node + + def match_face(self, node: nodes.Face) -> nodes.Face: + """Set a bsn:Face node's uri to its ucid.""" + node.uri = getattr(self._prefix.face(), node.ucid) + return node + +## EOF ## diff --git a/bsie/matcher/matcher.py b/bsie/matcher/matcher.py new file mode 100644 index 0000000..a89626f --- /dev/null +++ b/bsie/matcher/matcher.py @@ -0,0 +1,61 @@ + +# standard imports +import abc +import typing + +# bsie imports +from bsie.utils import bsfs + +# inner-module imports +from . import nodes + +# exports +__all__: typing.Sequence[str] = ( + 'Matcher', + ) + + +## code ## + +class Matcher(): + """Determine node uri's from node hints.""" + def __call__( + self, + iterable: typing.Iterable[typing.Tuple[nodes.Node, bsfs.URI, typing.Any]], + ): + """Apply the matcher on a triple iterator.""" + return MatcherIterator(self, iterable) + + @abc.abstractmethod + def match_node(self, node: nodes.Node) -> nodes.Node: + """Apply the matcher on a node.""" + + +class MatcherIterator(): + """Iterates over triples, determines uris according to a *matcher* as it goes.""" + + # source triple iterator. + _iterable: typing.Iterable[typing.Tuple[nodes.Node, bsfs.URI, typing.Any]] + + # node matcher + _matcher: Matcher + + def __init__( + self, + matcher: Matcher, + iterable: typing.Iterable[typing.Tuple[nodes.Node, bsfs.URI, typing.Any]], + ): + self._iterable = iterable + self._matcher = matcher + + def __iter__(self): + for node, pred, value in self._iterable: + # handle subject + self._matcher.match_node(node) + # handle value + if isinstance(value, nodes.Node): + self._matcher.match_node(value) + # yield triple + yield node, pred, value + +## EOF ## diff --git a/bsie/matcher/nodes.py b/bsie/matcher/nodes.py new file mode 100644 index 0000000..047e7d1 --- /dev/null +++ b/bsie/matcher/nodes.py @@ -0,0 +1,49 @@ + +# standard imports +from dataclasses import dataclass +import typing + +# bsie imports +from bsie.utils import bsfs, ns + +# exports +__all__: typing.Sequence[str] = ( + 'Entity', + 'Face', + 'Node', + 'Person', + 'Preview', + 'Tag', + ) + +@dataclass(kw_only=True, unsafe_hash=True) +class Node: # pylint: disable=missing-class-docstring + # FIXME: Only allow changes to uri after init + uri: typing.Optional[bsfs.URI] = None + +@dataclass(kw_only=True, unsafe_hash=True) +class Entity(Node): # pylint: disable=missing-class-docstring + node_type: bsfs.URI = ns.bsn.Entity + ucid: str + +@dataclass(kw_only=True, unsafe_hash=True) +class Face(Node): # pylint: disable=missing-class-docstring + node_type: bsfs.URI = ns.bsn.Face + ucid: str + +@dataclass(kw_only=True, unsafe_hash=True) +class Person(Node): # pylint: disable=missing-class-docstring + node_type: bsfs.URI = ns.bsn.Person + +@dataclass(kw_only=True, unsafe_hash=True) +class Preview(Node): # pylint: disable=missing-class-docstring + node_type: bsfs.URI = ns.bsn.Preview + ucid: str + size: int + +@dataclass(kw_only=True, unsafe_hash=True) +class Tag(Node): # pylint: disable=missing-class-docstring + node_type: bsfs.URI = ns.bsn.Tag + label: str + +## EOF ## diff --git a/bsie/reader/face.py b/bsie/reader/face.py index c5374e0..e43b93f 100644 --- a/bsie/reader/face.py +++ b/bsie/reader/face.py @@ -9,7 +9,7 @@ import PIL.Image import torch # bsie imports -from bsie.utils import bsfs, errors, node, ns +from bsie.utils import bsfs, errors, ns # inner-module imports from . import base diff --git a/bsie/utils/__init__.py b/bsie/utils/__init__.py index 4f08604..0c96139 100644 --- a/bsie/utils/__init__.py +++ b/bsie/utils/__init__.py @@ -7,7 +7,6 @@ import typing from . import bsfs from . import filematcher from . import namespaces as ns -from . import node from .filewalker import list_files from .loading import safe_load, unpack_qualified_name @@ -15,7 +14,6 @@ from .loading import safe_load, unpack_qualified_name __all__: typing.Sequence[str] = ( 'bsfs', 'filematcher', - 'node', 'ns', 'safe_load', 'unpack_qualified_name', diff --git a/bsie/utils/node.py b/bsie/utils/node.py deleted file mode 100644 index fa34b2e..0000000 --- a/bsie/utils/node.py +++ /dev/null @@ -1,66 +0,0 @@ -"""Lighweight Node to bridge to BSFS. -""" -# standard imports -import typing - -# bsie imports -from bsie.utils import bsfs - -# exports -__all__: typing.Sequence[str] = ( - 'Node', - ) - - -## code ## - -class Node(): - """Lightweight Node, disconnected from any bsfs structures. - - In most cases, provide *hints* and leave setting the uri to a node - naming policy. Only provide an *uri* if it is absolutely determined. - - """ - - # node type. - node_type: bsfs.URI - - # node URI. - uri: typing.Optional[bsfs.URI] - - # node naming hints. - hits: dict - - def __init__( - self, - node_type: bsfs.URI, - uri: typing.Optional[bsfs.URI] = None, - **uri_hints, - ): - # assign members - self.node_type = bsfs.URI(node_type) - self.hints = uri_hints - self.uri = uri - - def __eq__(self, other: typing.Any) -> bool: - """Compare two Node instances based on type and uri. - Compares hits only if the uri is not yet specified. - """ - return isinstance(other, Node) \ - and other.node_type == self.node_type \ - and other.uri == self.uri \ - and (self.uri is not None or self.hints == other.hints) - - def __hash__(self) -> int: - identifier = self.uri - if identifier is None: - identifier = tuple((key, self.hints[key]) for key in sorted(self.hints)) - return hash((type(self), self.node_type, identifier)) - - def __str__(self) -> str: - return f'{bsfs.typename(self)}({self.node_type}, {self.uri})' - - def __repr__(self) -> str: - return f'{bsfs.typename(self)}({self.node_type}, {self.uri})' - -## EOF ## diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py index 77ee02b..ea18385 100644 --- a/test/extractor/generic/test_constant.py +++ b/test/extractor/generic/test_constant.py @@ -3,7 +3,8 @@ import unittest # bsie imports -from bsie.utils import node as _node, ns +from bsie.matcher import nodes +from bsie.utils import ns # objects to test from bsie.extractor.generic.constant import Constant @@ -28,7 +29,7 @@ class TestConstant(unittest.TestCase): (ns.bse.comment, 'the quick brown fox jumps over the lazy dog.'), ] ext = Constant(schema, tuples) - node = _node.Node(ns.bsn.Entity, '') # Blank node + node = nodes.Entity(ucid='abc123') # Blank node p_author = ext.schema.predicate(ns.bse.author) p_comment = ext.schema.predicate(ns.bse.comment) entity = ext.schema.node(ns.bsfs.Node).child(ns.bsn.Entity) diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py index 569703d..132b670 100644 --- a/test/extractor/generic/test_path.py +++ b/test/extractor/generic/test_path.py @@ -5,7 +5,8 @@ import unittest # bsie imports from bsie.extractor import base -from bsie.utils import bsfs, node as _node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # objects to test from bsie.extractor.generic.path import Path @@ -40,7 +41,7 @@ class TestPath(unittest.TestCase): def test_extract(self): ext = Path() - node = _node.Node(ns.bsn.Entity, '') # Blank node + node = nodes.Entity(ucid='abc123') content = '/tmp/foo/bar' p_filename = ext.schema.predicate(ns.bse.filename) p_dirname = ext.schema.predicate(ns.bse.dirname) @@ -68,7 +69,7 @@ class TestPath(unittest.TestCase): self.assertSetEqual(set(ext.extract(node, '', (p_filename, p_dirname))), {(node, p_filename, ''), (node, p_dirname, os.path.dirname(os.getcwd()))}) # errors are suppressed - self.assertSetEqual(set(ext.extract(node, None, (p_filename, ))), set()) + self.assertSetEqual(set(ext.extract(node, None, (p_filename, p_dirname))), set()) ## main ## diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py index 0e83e24..38a9c0c 100644 --- a/test/extractor/generic/test_stat.py +++ b/test/extractor/generic/test_stat.py @@ -5,7 +5,8 @@ import unittest # bsie imports from bsie.extractor import base -from bsie.utils import bsfs, node as _node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # objects to test from bsie.extractor.generic.stat import Stat @@ -34,7 +35,7 @@ class TestStat(unittest.TestCase): def test_extract(self): ext = Stat() - node = _node.Node(ns.bsn.Entity, '') # Blank node + node = nodes.Entity(ucid='abc123') content = os.stat(__file__) p_filesize = ext.schema.predicate(ns.bse.filesize) entity = ext.schema.node(ns.bsfs.Node).child(ns.bsn.Entity) diff --git a/test/extractor/image/face/test_detect.py b/test/extractor/image/face/test_detect.py index 92375a2..89a3461 100644 --- a/test/extractor/image/face/test_detect.py +++ b/test/extractor/image/face/test_detect.py @@ -8,8 +8,9 @@ import unittest # bsie imports from bsie.extractor import base +from bsie.matcher import nodes from bsie.reader.face import FaceExtract -from bsie.utils import bsfs, node as _node, ns +from bsie.utils import bsfs, ns # objects to test from bsie.extractor.image.face.detect import FaceDetect, bsf @@ -31,10 +32,11 @@ class TestFaceDetect(unittest.TestCase): # setup rdr = FaceExtract() ext = FaceDetect() - subject = _node.Node(ns.bsfs.Entity) + subject = nodes.Entity(ucid='abc123') content = rdr(os.path.join(os.path.dirname(__file__), 'testface1.jpg')) principals = set(ext.principals) - face = _node.Node(ns.bsn.Face, ucid='2a7203c1515e0caa66a7461452c0b4552f1433a613cb3033e59ed2361790ad45') + face = nodes.Face( + ucid='2a7203c1515e0caa66a7461452c0b4552f1433a613cb3033e59ed2361790ad45') triples = list(ext.extract(subject, content, principals)) # principals is bse:face self.assertSetEqual(principals, {ext.schema.predicate(ns.bse.face)}) diff --git a/test/extractor/image/face/test_identify.py b/test/extractor/image/face/test_identify.py index dde41db..2d52353 100644 --- a/test/extractor/image/face/test_identify.py +++ b/test/extractor/image/face/test_identify.py @@ -10,8 +10,9 @@ import requests # bsie imports from bsie.extractor import base +from bsie.matcher import nodes from bsie.reader.face import FaceExtract -from bsie.utils import bsfs, node as _node, ns +from bsie.utils import bsfs, ns # objects to test from bsie.extractor.image.face.identify import FaceIdentify, bsf @@ -106,11 +107,12 @@ class TestFaceIdentify(unittest.TestCase): os.path.join(os.path.dirname(__file__), 'ref_embeds.npy'), os.path.join(os.path.dirname(__file__), 'ref_mapping.csv'), ) - subject = _node.Node(ns.bsfs.Entity) + subject = nodes.Entity(ucid='abc123') content = rdr(os.path.join(os.path.dirname(__file__), 'testface1.jpg')) principals = set(ext.principals) - face = _node.Node(ns.bsn.Face, ucid='2a7203c1515e0caa66a7461452c0b4552f1433a613cb3033e59ed2361790ad45') - person = _node.Node(ns.bsn.Person, uri='https://example.com/user/Angelina_Jolie') + face = nodes.Face( + ucid='2a7203c1515e0caa66a7461452c0b4552f1433a613cb3033e59ed2361790ad45') + person = nodes.Person(uri='https://example.com/user/Angelina_Jolie') triples = list(ext.extract(subject, content, principals)) # principls is bse:face, bsf:depicts self.assertSetEqual(set(ext.principals), { @@ -128,10 +130,11 @@ class TestFaceIdentify(unittest.TestCase): self.assertListEqual(list(ext.extract(subject, content, principals)), []) # identifies the correct person despite somewhat similar options content = rdr(os.path.join(os.path.dirname(__file__), 'testface3.jpg')) - face = _node.Node(ns.bsn.Face, ucid='f61fac01ef686ee05805afef1e7a10ba54c30dc1aa095d9e77d79ccdfeb40dc5') + face = nodes.Face( + ucid='f61fac01ef686ee05805afef1e7a10ba54c30dc1aa095d9e77d79ccdfeb40dc5') triples = list(ext.extract(subject, content, principals)) self.assertEqual(len(triples), 2) - person = _node.Node(ns.bsn.Person, uri='https://example.com/user/Paul_Rudd') + person = nodes.Person(uri='https://example.com/user/Paul_Rudd') self.assertIn((subject, ext.schema.predicate(ns.bse.face), face), triples) self.assertIn((face, ext.schema.predicate(bsf.depicts), person), triples) # no triples on principal mismatch diff --git a/test/extractor/image/test_colors_spatial.py b/test/extractor/image/test_colors_spatial.py index 902ab6d..1c87bb7 100644 --- a/test/extractor/image/test_colors_spatial.py +++ b/test/extractor/image/test_colors_spatial.py @@ -8,7 +8,8 @@ import PIL.Image # bsie imports from bsie.extractor import base -from bsie.utils import bsfs, ns, node as _node +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # objects to test from bsie.extractor.image.colors_spatial import ColorsSpatial @@ -73,7 +74,7 @@ class TestColorsSpatial(unittest.TestCase): def test_extract(self): ext = ColorsSpatial(2,2,2) img = PIL.Image.open(os.path.join(os.path.dirname(__file__), 'testimage.jpg')) - node = _node.Node(ns.bsn.Entity, bsfs.URI('http://example.com/entity#1234')) + node = nodes.Entity(ucid='1234') principals = set(ext.principals) self.assertEqual(len(principals), 1) # valid invocation yields feature diff --git a/test/extractor/image/test_iptc.py b/test/extractor/image/test_iptc.py index 5fa763d..7efbdfe 100644 --- a/test/extractor/image/test_iptc.py +++ b/test/extractor/image/test_iptc.py @@ -4,7 +4,8 @@ import unittest # bsie imports from bsie.extractor import base -from bsie.utils import bsfs, node as _node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # objects to test from bsie.extractor.image.iptc import Iptc @@ -36,29 +37,29 @@ class TestIptc(unittest.TestCase): def test_extract(self): ext = Iptc() - node = _node.Node(ns.bsfs.File, '') # Blank node + subject = nodes.Entity(ucid='abc123') content = { 'Iptc.Application2.Keywords': ['hello', 'world'], 'Iptc.Application2.RecordVersion': '4', } # target tags - t_hello = _node.Node(ns.bsn.Tag, label='hello') - t_world = _node.Node(ns.bsn.Tag, label='world') + t_hello = nodes.Tag(label='hello') + t_world = nodes.Tag(label='world') # invalid principals are ignored - self.assertSetEqual(set(ext.extract(node, content, {ns.bse.filename})), set()) + self.assertSetEqual(set(ext.extract(subject, content, {ns.bse.filename})), set()) # extract finds all relevant information - self.assertSetEqual(set(ext.extract(node, content, {ext.schema.predicate(ns.bse.tag)})), { - (node, ext.schema.predicate(ns.bse.tag), t_hello), - (node, ext.schema.predicate(ns.bse.tag), t_world), + self.assertSetEqual(set(ext.extract(subject, content, {ext.schema.predicate(ns.bse.tag)})), { + (subject, ext.schema.predicate(ns.bse.tag), t_hello), + (subject, ext.schema.predicate(ns.bse.tag), t_world), (t_hello, ext.schema.predicate(ns.bst.label), 'hello'), (t_world, ext.schema.predicate(ns.bst.label), 'world'), }) # empty content is acceptable - self.assertSetEqual(set(ext.extract(node, {}, set(ext.principals))), set()) + self.assertSetEqual(set(ext.extract(subject, {}, set(ext.principals))), set()) # no principals is acceptable - self.assertSetEqual(set(ext.extract(node, content, set())), set()) + self.assertSetEqual(set(ext.extract(subject, content, set())), set()) ## main ## diff --git a/test/extractor/image/test_photometrics.py b/test/extractor/image/test_photometrics.py index fb219e2..1316618 100644 --- a/test/extractor/image/test_photometrics.py +++ b/test/extractor/image/test_photometrics.py @@ -4,7 +4,8 @@ import unittest # bsie imports from bsie.extractor import base -from bsie.utils import bsfs, node as _node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns # objects to test from bsie.extractor.image.photometrics import Exif, _gps_to_dec @@ -55,7 +56,7 @@ class TestExif(unittest.TestCase): def test_extract(self): ext = Exif() - node = _node.Node(ns.bsfs.File, '') # Blank node + node = nodes.Entity(ucid='abc123') content = { 'Exif.Photo.ExposureTime': '10/600', 'Exif.Photo.FNumber': '48/10', diff --git a/test/extractor/test_preview.py b/test/extractor/test_preview.py index 6526783..7b60520 100644 --- a/test/extractor/test_preview.py +++ b/test/extractor/test_preview.py @@ -9,7 +9,8 @@ import PIL.Image # bsie imports from bsie.extractor import base -from bsie.utils import bsfs, node as _node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, ns from bsie.reader.preview import Preview as Reader # objects to test @@ -69,7 +70,7 @@ class TestPreview(unittest.TestCase): def test_extract(self): # setup dependents rdr = Reader() - subject = _node.Node(ns.bsn.Entity) + subject = nodes.Entity(ucid='abc123') path = os.path.join(os.path.dirname(__file__), 'testimage.jpg') # setup extractor diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 6586e58..a154477 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -6,9 +6,10 @@ import unittest # bsie imports from bsie.extractor import ExtractorBuilder from bsie.extractor.base import SCHEMA_PREAMBLE -from bsie.lib import PipelineBuilder, DefaultNamingPolicy +from bsie.lib import PipelineBuilder +from bsie.matcher import nodes, DefaultMatcher from bsie.reader import ReaderBuilder -from bsie.utils import bsfs, node, ns +from bsie.utils import bsfs, ns # objects to test from bsie.lib.bsie import BSIE @@ -35,13 +36,13 @@ class TestBSIE(unittest.TestCase): )}, ]) # build pipeline - self.naming_policy = DefaultNamingPolicy(host='http://example.com/local', user='me') + self.matcher = DefaultMatcher(host='http://example.com/local', user='me') pbuild = PipelineBuilder(rbuild, ebuild) self.pipeline = pbuild.build() def test_construction(self): # only pipeline and naming policy - lib = BSIE(self.pipeline, self.naming_policy) + lib = BSIE(self.pipeline, self.matcher) self.assertSetEqual(set(lib.principals), { ns.bse.filename, ns.bse.dirname, @@ -71,7 +72,7 @@ class TestBSIE(unittest.TestCase): ''')) # specify collect - lib = BSIE(self.pipeline, self.naming_policy, collect={ + lib = BSIE(self.pipeline, self.matcher, collect={ ns.bse.filesize, ns.bse.author, ns.bse.inexistent, @@ -92,7 +93,7 @@ class TestBSIE(unittest.TestCase): bsfs:unique "true"^^xsd:boolean . ''')) # empty collect is disregarded - lib = BSIE(self.pipeline, self.naming_policy, collect={}) + lib = BSIE(self.pipeline, self.matcher, collect={}) self.assertSetEqual(set(lib.principals), { ns.bse.filename, ns.bse.dirname, @@ -123,7 +124,7 @@ class TestBSIE(unittest.TestCase): ''')) # specify discard - lib = BSIE(self.pipeline, self.naming_policy, discard={ + lib = BSIE(self.pipeline, self.matcher, discard={ ns.bse.filesize, ns.bse.filename, ns.bse.inexistent, @@ -151,7 +152,7 @@ class TestBSIE(unittest.TestCase): ''')) # specify collect and discard - lib = BSIE(self.pipeline, self.naming_policy, + lib = BSIE(self.pipeline, self.matcher, collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar}, discard={ns.bse.author, ns.bse.foo, ns.bse.foobar}, ) @@ -168,7 +169,7 @@ class TestBSIE(unittest.TestCase): def test_from_file(self): # setup - lib = BSIE(self.pipeline, self.naming_policy) + lib = BSIE(self.pipeline, self.matcher) self.assertSetEqual(set(lib.principals), { ns.bse.filesize, ns.bse.filename, @@ -176,7 +177,10 @@ class TestBSIE(unittest.TestCase): ns.bse.author, }) content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsn.Entity, uri=f'http://example.com/local/me/file#{content_hash}') + subject = nodes.Entity( + uri=f'http://example.com/local/me/file#{content_hash}', + ucid=content_hash, + ) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') # from_file extracts all available triples diff --git a/test/lib/test_naming_policy.py b/test/lib/test_naming_policy.py deleted file mode 100644 index a078fbd..0000000 --- a/test/lib/test_naming_policy.py +++ /dev/null @@ -1,158 +0,0 @@ - -# standard imports -import unittest - -# bsie imports -from bsie.utils import ns, errors -from bsie.utils.bsfs import URI -from bsie.utils.node import Node - -# objects to test -from bsie.lib.naming_policy import NamingPolicy, NamingPolicyIterator, DefaultNamingPolicy - - - -## code ## - -class TestDefaultNamingPolicy(unittest.TestCase): - - def test_handle_node(self): - # setup - policy = DefaultNamingPolicy('http://example.com', 'me') - # handle_node doesn't modify existing uris - self.assertEqual(policy.handle_node( - Node(ns.bsn.Invalid, uri='http://example.com/you/foo#bar')).uri, - URI('http://example.com/you/foo#bar')) - # processes bsn:Entity - self.assertEqual(policy.handle_node( - Node(ns.bsn.Entity, ucid='abc123cba')).uri, - URI('http://example.com/me/file#abc123cba')) - # processes bsn:Preview - self.assertEqual(policy.handle_node( - Node(ns.bsn.Preview, ucid='abc123cba', size=123)).uri, - URI('http://example.com/me/preview#abc123cba_s123')) - # processes bsn:Tag - self.assertEqual(policy.handle_node( - Node(ns.bsn.Tag, label='hello')).uri, - URI('http://example.com/me/tag#hello')) - # processes bsn:Face - self.assertEqual(policy.handle_node( - Node(ns.bsn.Face, ucid='hello')).uri, - URI('http://example.com/me/face#hello')) - # raises an exception on unknown types - self.assertRaises(errors.ProgrammingError, policy.handle_node, - Node(ns.bsn.Invalid, ucid='abc123cba', size=123)) - - def test_name_entity(self): - # setup - policy = DefaultNamingPolicy('http://example.com', 'me') - # name_entity uses ucid - self.assertEqual(policy.name_entity( - Node(ns.bsn.Entity, ucid='123abc321')).uri, - URI('http://example.com/me/file#123abc321')) - # name_entity falls back to a random guid - self.assertTrue(policy.name_entity( - Node(ns.bsn.Entity)).uri.startswith('http://example.com/me/file#')) - - def test_name_preview(self): - # setup - policy = DefaultNamingPolicy('http://example.com', 'me') - # name_preview uses ucid - self.assertEqual(policy.name_preview( - Node(ns.bsn.Preview, ucid='123abc321')).uri, - URI('http://example.com/me/preview#123abc321')) - self.assertEqual(policy.name_preview( - Node(ns.bsn.Preview, ucid='123abc321', size=400)).uri, - URI('http://example.com/me/preview#123abc321_s400')) - # name_preview uses source - self.assertEqual(policy.name_preview( - Node(ns.bsn.Preview, source=Node(ns.bsn.Entity, ucid='123file321'))).uri, - URI('http://example.com/me/preview#123file321')) - self.assertEqual(policy.name_preview( - Node(ns.bsn.Preview, source=Node(ns.bsn.Entity, ucid='123file321'), size=300)).uri, - URI('http://example.com/me/preview#123file321_s300')) - # name_preview falls back to a random guid - self.assertTrue(policy.name_preview( - Node(ns.bsn.Preview)).uri.startswith('http://example.com/me/preview#')) - self.assertTrue(policy.name_preview( - Node(ns.bsn.Preview, size=200)).uri.startswith('http://example.com/me/preview#')) - self.assertTrue(policy.name_preview( - Node(ns.bsn.Preview, size=200)).uri.endswith('_s200')) - - def test_name_tag(self): - # setup - policy = DefaultNamingPolicy('http://example.com', 'me') - # name_tag uses label - self.assertEqual(policy.name_tag( - Node(ns.bsn.Tag, label='hello')).uri, - URI('http://example.com/me/tag#hello')) - # name_tag matches the label - self.assertEqual( - policy.name_tag(Node(ns.bsn.Tag, label='world')), - policy.name_tag(Node(ns.bsn.Tag, label='world')), - ) - self.assertNotEqual( - policy.name_tag(Node(ns.bsn.Tag, label='hello')), - policy.name_tag(Node(ns.bsn.Tag, label='world')), - ) - # label can include characters that are not valid for an uri - self.assertEqual(policy.name_tag( - Node(ns.bsn.Preview, label='hello world { foo bar ] ')).uri, - URI('http://example.com/me/tag#hello%20world%20%7B%20foo%20bar%20%5D%20')) - # name_tag falls back to a random guid - self.assertTrue(policy.name_tag( - Node(ns.bsn.Tag,)).uri.startswith('http://example.com/me/tag#')) - - def test_name_face(self): - # setup - policy = DefaultNamingPolicy('http://example.com', 'me') - # name_face uses ucid - self.assertEqual(policy.name_face( - Node(ns.bsn.Face, ucid='hello_world')).uri, - URI('http://example.com/me/face#hello_world')) - # name_face falls back to a random guid - self.assertTrue(policy.name_face( - Node(ns.bsn.Face)).uri.startswith('http://example.com/me/face#')) - - -class TestNamingPolicyIterator(unittest.TestCase): - - def test_call(self): # NOTE: We test NamingPolicy.__call__ here - # setup - policy = DefaultNamingPolicy('http://example.com', 'me') - # call accepts list - triples = [('node', 'pred', 'value'), ('node', 'pred', 'value')] - it = policy(triples) - self.assertIsInstance(it, NamingPolicyIterator) - self.assertEqual(it._iterable, triples) - self.assertEqual(it._policy, policy) - # call accepts iterator - triples = iter([('node', 'pred', 'value'), ('node', 'pred', 'value')]) - it = policy(triples) - self.assertIsInstance(it, NamingPolicyIterator) - self.assertEqual(it._iterable, triples) - self.assertEqual(it._policy, policy) - - def test_iter(self): - # setup - policy = DefaultNamingPolicy('http://example.com', 'me') - triples = [ - (Node(ns.bsn.Entity, ucid='foo'), 'predA', 'hello'), - (Node(ns.bsn.Preview, ucid='bar'), 'predB', 1234), - (Node(ns.bsn.Preview, ucid='hello'), 'predC', Node(ns.bsn.Entity, ucid='world')) - ] - # handles nodes, handles values, ignores predicate - self.assertListEqual(list(policy(triples)), [ - (Node(ns.bsn.Entity, uri='http://example.com/me/file#foo'), 'predA', 'hello'), - (Node(ns.bsn.Preview, uri='http://example.com/me/preview#bar'), 'predB', 1234), - (Node(ns.bsn.Preview, uri='http://example.com/me/preview#hello'), 'predC', - Node(ns.bsn.Entity, uri='http://example.com/me/file#world')), - ]) - - -## main ## - -if __name__ == '__main__': - unittest.main() - -## EOF ## diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py index 8d836fd..05c6768 100644 --- a/test/lib/test_pipeline.py +++ b/test/lib/test_pipeline.py @@ -5,7 +5,8 @@ import os import unittest # bsie imports -from bsie.utils import bsfs, errors, node, ns +from bsie.matcher import nodes +from bsie.utils import bsfs, errors, ns import bsie.extractor.generic.constant import bsie.extractor.generic.path import bsie.extractor.generic.stat @@ -84,7 +85,7 @@ class TestPipeline(unittest.TestCase): pipeline = Pipeline(self.ext2rdr) # build objects for tests content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsn.Entity, ucid=content_hash) + subject = nodes.Entity(ucid=content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) p_dirname = pipeline.schema.predicate(ns.bse.dirname) diff --git a/test/matcher/__init__.py b/test/matcher/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/matcher/__init__.py diff --git a/test/matcher/test_default_matcher.py b/test/matcher/test_default_matcher.py new file mode 100644 index 0000000..2ed371f --- /dev/null +++ b/test/matcher/test_default_matcher.py @@ -0,0 +1,104 @@ + +# standard imports +from dataclasses import dataclass +import unittest + +# bsie imports +from bsie.matcher import nodes +from bsie.utils import ns, errors +from bsie.utils.bsfs import URI + +# objects to test +from bsie.matcher.default_matcher import DefaultMatcher + + +## code ## + +@dataclass(kw_only=True, unsafe_hash=True) +class Invalid(nodes.Node): + ucid: str + +class TestDefaultMatcher(unittest.TestCase): + + def test_match_node(self): + # setup + matcher = DefaultMatcher('http://example.com', 'me') + # match_node doesn't modify existing uris + self.assertEqual(matcher.match_node( + nodes.Node(uri='http://example.com/you/foo#bar')).uri, + URI('http://example.com/you/foo#bar')) + # processes bsn:Entity + self.assertEqual(matcher.match_node( + nodes.Entity(ucid='abc123cba')).uri, + URI('http://example.com/me/file#abc123cba')) + # processes bsn:Preview + self.assertEqual(matcher.match_node( + nodes.Preview(ucid='abc123cba', size=123)).uri, + URI('http://example.com/me/preview#abc123cba_s123')) + # processes bsn:Tag + self.assertEqual(matcher.match_node( + nodes.Tag(label='hello')).uri, + URI('http://example.com/me/tag#hello')) + # processes bsn:Face + self.assertEqual(matcher.match_node( + nodes.Face(ucid='hello')).uri, + URI('http://example.com/me/face#hello')) + # raises an exception on unknown types + self.assertRaises(ValueError, matcher.match_node, + Invalid(ucid='abc123cba')) + + def test_match_entity(self): + # setup + matcher = DefaultMatcher('http://example.com', 'me') + # match_entity uses ucid + self.assertEqual(matcher.match_entity( + nodes.Entity(ucid='123abc321')).uri, + URI('http://example.com/me/file#123abc321')) + + def test_match_preview(self): + # setup + matcher = DefaultMatcher('http://example.com', 'me') + # match_preview uses ucid + self.assertEqual(matcher.match_preview( + nodes.Preview(ucid='123abc321', size=400)).uri, + URI('http://example.com/me/preview#123abc321_s400')) + self.assertEqual(matcher.match_preview( + nodes.Preview(ucid='321cba123', size=200)).uri, + URI('http://example.com/me/preview#321cba123_s200')) + + def test_match_tag(self): + # setup + matcher = DefaultMatcher('http://example.com', 'me') + # match_tag uses label + self.assertEqual(matcher.match_tag( + nodes.Tag(label='hello')).uri, + URI('http://example.com/me/tag#hello')) + # match_tag matches the label + self.assertEqual( + matcher.match_tag(nodes.Tag(label='world')), + matcher.match_tag(nodes.Tag(label='world')), + ) + self.assertNotEqual( + matcher.match_tag(nodes.Tag(label='hello')), + matcher.match_tag(nodes.Tag(label='world')), + ) + # label can include characters that are not valid for an uri + self.assertEqual(matcher.match_tag( + nodes.Tag(label='hello world { foo bar ] ')).uri, + URI('http://example.com/me/tag#hello%20world%20%7B%20foo%20bar%20%5D%20')) + + def test_match_face(self): + # setup + matcher = DefaultMatcher('http://example.com', 'me') + # match_face uses ucid + self.assertEqual(matcher.match_face( + nodes.Face(ucid='hello_world')).uri, + URI('http://example.com/me/face#hello_world')) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/matcher/test_matcher.py b/test/matcher/test_matcher.py new file mode 100644 index 0000000..eaa4766 --- /dev/null +++ b/test/matcher/test_matcher.py @@ -0,0 +1,62 @@ + +# standard imports +import unittest + +# bsie imports +from bsie.matcher import nodes +from bsie.utils import ns, errors +from bsie.utils.bsfs import URI + +# objects to test +from bsie.matcher.matcher import Matcher, MatcherIterator + + +## code ## + +class StubMatcher(Matcher): + def match_node(self, node): + if node.uri is None: + node.uri = 'foo' + return node + +class TestMatcherIterator(unittest.TestCase): + + def test_call(self): + # setup + matcher = StubMatcher() + # call accepts list + triples = [('node', 'pred', 'value'), ('node', 'pred', 'value')] + it = matcher(triples) + self.assertIsInstance(it, MatcherIterator) + self.assertEqual(it._iterable, triples) + self.assertEqual(it._matcher, matcher) + # call accepts iterator + triples = iter([('node', 'pred', 'value'), ('node', 'pred', 'value')]) + it = matcher(triples) + self.assertIsInstance(it, MatcherIterator) + self.assertEqual(it._iterable, triples) + self.assertEqual(it._matcher, matcher) + + def test_iter(self): + # setup + matcher = StubMatcher() + triples = [ + (nodes.Entity(ucid='foo'), 'predA', 'hello'), + (nodes.Preview(ucid='bar', size=123), 'predB', 1234), + (nodes.Preview(ucid='hello', size=321), 'predC', nodes.Entity(ucid='world')) + ] + # handles nodes, handles values, ignores predicate + self.assertListEqual(list(matcher(triples)), [ + (nodes.Entity(uri='foo', ucid='foo'), 'predA', 'hello'), + (nodes.Preview(uri='foo', ucid='bar', size=123), 'predB', 1234), + (nodes.Preview(uri='foo', ucid='hello', size=321), 'predC', + nodes.Entity(uri='foo', ucid='world')), + ]) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/matcher/test_nodes.py b/test/matcher/test_nodes.py new file mode 100644 index 0000000..d884132 --- /dev/null +++ b/test/matcher/test_nodes.py @@ -0,0 +1,96 @@ + +# standard imports +import unittest + +# bsie imports +from bsie.utils import bsfs, ns + +# objects to test +from bsie.matcher import nodes + + +## code ## + +class TestNode(unittest.TestCase): + def test_node(self): + # cannot pass additional kwargs + self.assertRaises(TypeError, nodes.Node, foo='bar') + # may pass uri + self.assertIsInstance(nodes.Node(uri='bar'), nodes.Node) + self.assertEqual(nodes.Node(uri='bar').uri, 'bar') + + def test_entity(self): + # must pass ucid + self.assertRaises(TypeError, nodes.Entity) + self.assertRaises(TypeError, nodes.Entity, uri='foo') + self.assertIsInstance(nodes.Entity(ucid='foo'), nodes.Entity) + self.assertIsInstance(nodes.Entity(ucid='bar'), nodes.Entity) + # cannot pass additional kwargs + self.assertRaises(TypeError, nodes.Entity, ucid='foo', foo='bar') + # may pass uri + self.assertIsInstance(nodes.Entity(ucid='foo', uri='bar'), nodes.Entity) + self.assertEqual(nodes.Entity(ucid='foo', uri='bar').uri, 'bar') + # has node_type + self.assertEqual(nodes.Entity(ucid='foo').node_type, ns.bsn.Entity) + + def test_face(self): + # must pass ucid + self.assertRaises(TypeError, nodes.Face) + self.assertRaises(TypeError, nodes.Face, uri='foo') + self.assertIsInstance(nodes.Face(ucid='foo'), nodes.Face) + self.assertIsInstance(nodes.Face(ucid='bar'), nodes.Face) + # cannot pass additional kwargs + self.assertRaises(TypeError, nodes.Face, ucid='foo', foo='bar') + # may pass uri + self.assertIsInstance(nodes.Face(ucid='foo', uri='bar'), nodes.Face) + self.assertEqual(nodes.Face(ucid='foo', uri='bar').uri, 'bar') + # has node_type + self.assertEqual(nodes.Face(ucid='foo').node_type, ns.bsn.Face) + + def test_person(self): + # cannot pass additional kwargs + self.assertRaises(TypeError, nodes.Person, foo='bar') + # may pass uri + self.assertIsInstance(nodes.Person(uri='bar'), nodes.Person) + self.assertEqual(nodes.Person(uri='bar').uri, 'bar') + # has node_type + self.assertEqual(nodes.Person().node_type, ns.bsn.Person) + + def test_preview(self): + # must pass ucid and size + self.assertRaises(TypeError, nodes.Preview) + self.assertRaises(TypeError, nodes.Preview, ucid='foo') + self.assertRaises(TypeError, nodes.Preview, size=123) + self.assertRaises(TypeError, nodes.Preview, ucid='foo', uri='foo') + self.assertRaises(TypeError, nodes.Preview, size=123, uri='foo') + self.assertIsInstance(nodes.Preview(ucid='foo', size=123), nodes.Preview) + self.assertIsInstance(nodes.Preview(ucid='bar', size=321), nodes.Preview) + # cannot pass additional kwargs + self.assertRaises(TypeError, nodes.Preview, ucid='foo', size=123, foo='bar') + # may pass uri + self.assertIsInstance(nodes.Preview(ucid='foo', size=123, uri='bar'), nodes.Preview) + self.assertEqual(nodes.Preview(ucid='foo', size=123, uri='bar').uri, 'bar') + # has node_type + self.assertEqual(nodes.Preview(ucid='foo', size=123).node_type, ns.bsn.Preview) + + def test_tag(self): + # must pass label + self.assertRaises(TypeError, nodes.Tag) + self.assertRaises(TypeError, nodes.Tag, uri='foo') + self.assertIsInstance(nodes.Tag(label='foo'), nodes.Tag) + self.assertIsInstance(nodes.Tag(label='bar'), nodes.Tag) + # cannot pass additional kwargs + self.assertRaises(TypeError, nodes.Tag, label='foo', foo='bar') + # may pass uri + self.assertIsInstance(nodes.Tag(label='foo', uri='bar'), nodes.Tag) + self.assertEqual(nodes.Tag(label='foo', uri='bar').uri, 'bar') + # has node_type + self.assertEqual(nodes.Tag(label='foo').node_type, ns.bsn.Tag) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/utils/test_node.py b/test/utils/test_node.py deleted file mode 100644 index c0662a1..0000000 --- a/test/utils/test_node.py +++ /dev/null @@ -1,92 +0,0 @@ - -# standard imports -import unittest - -# bsie imports -from bsie.utils import bsfs, ns - -# objects to test -from bsie.utils.node import Node - - -## code ## - -class TestNode(unittest.TestCase): - def test_equality(self): - uri1 = bsfs.URI('http://example.com/me/entity#1234') - uri2 = bsfs.URI('http://example.com/me/entity#4321') - node = Node(ns.bsfs.Entity, uri1) - # equality respects uri - self.assertEqual(node, Node(ns.bsfs.Entity, uri1)) - self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, uri1))) - self.assertNotEqual(node, Node(ns.bsfs.Entity, uri2)) - self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, uri2))) - # equality respects hints - self.assertEqual( - Node(ns.bsfs.Entity, foo='foo'), - Node(ns.bsfs.Entity, foo='foo')) - self.assertEqual( - hash(Node(ns.bsfs.Entity, foo='foo')), - hash(Node(ns.bsfs.Entity, foo='foo'))) - self.assertNotEqual( - Node(ns.bsfs.Entity, foo='foo'), - Node(ns.bsfs.Entity, foo='bar')) - self.assertNotEqual( - hash(Node(ns.bsfs.Entity, foo='foo')), - hash(Node(ns.bsfs.Entity, foo='bar'))) - self.assertNotEqual( - Node(ns.bsfs.Entity, foo='bar'), - Node(ns.bsfs.Entity, bar='foo')) - self.assertNotEqual( - hash(Node(ns.bsfs.Entity, foo='bar')), - hash(Node(ns.bsfs.Entity, bar='foo'))) - # hints are irrelevant if uri is set - self.assertEqual( - Node(ns.bsfs.Entity, uri=uri1, foo='bar'), - Node(ns.bsfs.Entity, uri=uri1, bar='foo')) - self.assertEqual( - hash(Node(ns.bsfs.Entity, uri=uri1, foo='bar')), - hash(Node(ns.bsfs.Entity, uri=uri1, bar='foo'))) - self.assertNotEqual( - Node(ns.bsfs.Entity, uri=uri1, foo='bar'), - Node(ns.bsfs.Entity, uri=uri2, bar='foo')) - self.assertNotEqual( - hash(Node(ns.bsfs.Entity, uri=uri1, foo='bar')), - hash(Node(ns.bsfs.Entity, uri=uri2, bar='foo'))) - # equality respects node_type - self.assertNotEqual(node, Node(ns.bsfs.Foo, uri1)) - self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Foo, uri1))) - # not equal to other types - self.assertNotEqual(node, 1234) - self.assertNotEqual(hash(node), hash(1234)) - self.assertNotEqual(node, uri1) - self.assertNotEqual(hash(node), hash(uri1)) - self.assertNotEqual(node, ns.bsfs.Entity) - self.assertNotEqual(hash(node), hash(ns.bsfs.Entity)) - class Foo(): pass - self.assertNotEqual(node, Foo()) - self.assertNotEqual(hash(node), hash(Foo())) - - def test_str(self): - uri = bsfs.URI('http://example.com/me/entity#1234') - # basic string conversion - node = Node(ns.bsn.Entity, uri) - self.assertEqual(str(node), 'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/entity#1234)') - self.assertEqual(repr(node), 'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/entity#1234)') - # string conversion respects node_type - node = Node(ns.bsn.Foo, uri) - self.assertEqual(str(node), 'Node(https://schema.bsfs.io/ie/Node/Foo, http://example.com/me/entity#1234)') - self.assertEqual(repr(node), 'Node(https://schema.bsfs.io/ie/Node/Foo, http://example.com/me/entity#1234)') - # string conversion respects uri - node = Node(ns.bsn.Entity, bsfs.URI('http://example.com/me/entity#4321')) - self.assertEqual(str(node), 'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/entity#4321)') - self.assertEqual(repr(node), 'Node(https://schema.bsfs.io/ie/Node/Entity, http://example.com/me/entity#4321)') - - - -## main ## - -if __name__ == '__main__': - unittest.main() - -## EOF ## |