diff options
Diffstat (limited to 'bsie/lib')
-rw-r--r-- | bsie/lib/__init__.py | 1 | ||||
-rw-r--r-- | bsie/lib/bsie.py | 12 | ||||
-rw-r--r-- | bsie/lib/naming_policy.py | 141 | ||||
-rw-r--r-- | bsie/lib/pipeline.py | 9 |
4 files changed, 10 insertions, 153 deletions
diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py index f44fb74..daa806c 100644 --- a/bsie/lib/__init__.py +++ b/bsie/lib/__init__.py @@ -5,7 +5,6 @@ import typing # inner-module imports from .bsie import BSIE from .builder import PipelineBuilder -from .naming_policy import DefaultNamingPolicy # exports __all__: typing.Sequence[str] = ( diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py index b02e707..9aa0bdb 100644 --- a/bsie/lib/bsie.py +++ b/bsie/lib/bsie.py @@ -3,10 +3,10 @@ import typing # bsie imports -from bsie.utils import bsfs, node, ns +from bsie.utils import bsfs, ns +from bsie.matcher import Matcher, nodes # inner-module imports -from .naming_policy import NamingPolicy from .pipeline import Pipeline # exports @@ -40,7 +40,7 @@ class BSIE(): # pipeline. pipeline: Pipeline, # naming policy - naming_policy: NamingPolicy, + matcher: Matcher, # principals to extract at most. None implies all available w.r.t. extractors. collect: typing.Optional[typing.Iterable[bsfs.URI]] = None, # principals to discard. @@ -48,7 +48,7 @@ class BSIE(): ): # store pipeline and naming policy self._pipeline = pipeline - self._naming_policy = naming_policy + self._matcher = matcher # start off with available principals self._principals = {pred.uri for pred in self._pipeline.principals} # limit principals to specified ones by argument. @@ -79,7 +79,7 @@ class BSIE(): self, path: bsfs.URI, principals: typing.Optional[typing.Iterable[bsfs.URI]] = None, - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.URI, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.URI, typing.Any]]: """Produce triples for a given *path*. Limit to *principals* if given.""" # get requested principals. principals = set(principals) if principals is not None else self._principals @@ -88,6 +88,6 @@ class BSIE(): # predicate lookup principals = {self.schema.predicate(pred) for pred in principals} # invoke pipeline - yield from self._naming_policy(self._pipeline(path, principals)) + yield from self._matcher(self._pipeline(path, principals)) ## EOF ## diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py deleted file mode 100644 index fbdbeb0..0000000 --- a/bsie/lib/naming_policy.py +++ /dev/null @@ -1,141 +0,0 @@ - -# standard imports -import abc -import os -import typing - -# external imports -import urllib.parse - -# bsie imports -from bsie.utils import bsfs, errors, ns -from bsie.utils.node import Node - -# exports -__all__: typing.Sequence[str] = ( - 'DefaultNamingPolicy', - ) - - -## code ## - -class NamingPolicy(): - """Determine node uri's from node hints.""" - def __call__( - self, - iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]], - ): - """Apply the policy on a triple iterator.""" - return NamingPolicyIterator(self, iterable) - - @abc.abstractmethod - def handle_node(self, node: Node) -> Node: - """Apply the policy on a node.""" - - -class NamingPolicyIterator(): - """Iterates over triples, determines uris according to a *policy* as it goes.""" - - # source triple iterator. - _iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]] - - # naming policy - _policy: NamingPolicy - - def __init__( - self, - policy: NamingPolicy, - iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]], - ): - self._iterable = iterable - self._policy = policy - - def __iter__(self): - for node, pred, value in self._iterable: - # handle subject - self._policy.handle_node(node) - # handle value - if isinstance(value, Node): - self._policy.handle_node(value) - # yield triple - yield node, pred, value - - -class DefaultNamingPolicy(NamingPolicy): - """Compose URIs as <host/user/node_type#fragment> - - What information is used as fragment depends on the node type. - Typically, the default is to use the "ucid" hint. - The fallback in all cases is to generate a random uuid. - - Never changes previously assigned uris. Sets uris in-place. - - """ - - def __init__( - self, - host: bsfs.URI, - user: str, - ): - self._prefix = bsfs.Namespace(os.path.join(host, user)) - self._uuid = bsfs.uuid.UUID() - - def handle_node(self, node: Node) -> Node: - if node.uri is not None: - return node - if node.node_type == ns.bsn.Entity: - return self.name_entity(node) - if node.node_type == ns.bsn.Preview: - return self.name_preview(node) - if node.node_type == ns.bsn.Tag: - return self.name_tag(node) - if node.node_type == ns.bsn.Face: - return self.name_face(node) - raise errors.ProgrammingError(f'no naming policy available for {node.node_type}') - - def name_entity(self, node: Node) -> Node: - """Set a bsn:Entity node's uri fragment to its ucid.""" - if 'ucid' in node.hints: # content id - fragment = node.hints['ucid'] - else: # random name - fragment = self._uuid() - node.uri = getattr(self._prefix.file(), fragment) - return node - - def name_preview(self, node: Node) -> Node: - """Set a bsn:Preview node's uri fragment to its ucid. - Uses its source fragment as fallback. Appends the size if provided. - """ - fragment = None - if 'ucid' in node.hints: # content id - fragment = node.hints['ucid'] - if fragment is None and 'source' in node.hints: # source id - self.handle_node(node.hints['source']) - fragment = node.hints['source'].uri.get('fragment', None) - if fragment is None: # random name - fragment = self._uuid() - if 'size' in node.hints: # append size - fragment += '_s' + str(node.hints['size']) - node.uri = getattr(self._prefix.preview(), fragment) - return node - - def name_tag(self, node: Node) -> Node: - # NOTE: Must ensure to produce the same name for that tags with the same label. - if 'label' in node.hints: # tag label - fragment = urllib.parse.quote(node.hints['label']) - else: # random name - fragment = self._uuid() - # FIXME: match to existing tags in bsfs storage! - node.uri = getattr(self._prefix.tag(), fragment) - return node - - def name_face(self, node: Node) -> Node: - if 'ucid' in node.hints: # content id - fragment = node.hints['ucid'] - else: # random name - fragment = self._uuid() - node.uri = getattr(self._prefix.face(), fragment) - return node - - -## EOF ## diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py index 30fd6fd..98d9cc8 100644 --- a/bsie/lib/pipeline.py +++ b/bsie/lib/pipeline.py @@ -6,8 +6,9 @@ import typing # bsie imports from bsie.extractor import Extractor +from bsie.matcher import nodes from bsie.reader import Reader -from bsie.utils import bsfs, errors, node, ns +from bsie.utils import bsfs, errors, ns # exports __all__: typing.Sequence[str] = ( @@ -85,7 +86,7 @@ class Pipeline(): self, path: bsfs.URI, principals: typing.Optional[typing.Iterable[bsfs.schema.Predicate]] = None, - ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: """Extract triples from the file at *path*. Optionally, limit triples to *principals*.""" # get principals principals = set(principals) if principals is not None else set(self.schema.predicates()) @@ -104,9 +105,7 @@ class Pipeline(): rdr2ext[rdr].add(ext) # create subject for file - subject = node.Node(ns.bsn.Entity, - ucid=bsfs.uuid.UCID.from_path(path), - ) + subject = nodes.Entity(ucid=bsfs.uuid.UCID.from_path(path)) # extract information for rdr, extrs in rdr2ext.items(): |