aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/lib
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-07-26 12:48:54 +0200
committerMatthias Baumgartner <dev@igsor.net>2023-07-26 12:48:54 +0200
commitd6a5c0f596a212f0e1d4e4b351b5b0e6857d74f7 (patch)
tree645c6fe6b120bb4759b7ac07b4799ffec3bfb4bf /bsie/lib
parentb1ee4452c0e4b820efe69e428e7eaa54cf87ae16 (diff)
downloadbsie-d6a5c0f596a212f0e1d4e4b351b5b0e6857d74f7.tar.gz
bsie-d6a5c0f596a212f0e1d4e4b351b5b0e6857d74f7.tar.bz2
bsie-d6a5c0f596a212f0e1d4e4b351b5b0e6857d74f7.zip
refactored naming policy into uri matcher
Diffstat (limited to 'bsie/lib')
-rw-r--r--bsie/lib/__init__.py1
-rw-r--r--bsie/lib/bsie.py12
-rw-r--r--bsie/lib/naming_policy.py141
-rw-r--r--bsie/lib/pipeline.py9
4 files changed, 10 insertions, 153 deletions
diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py
index f44fb74..daa806c 100644
--- a/bsie/lib/__init__.py
+++ b/bsie/lib/__init__.py
@@ -5,7 +5,6 @@ import typing
# inner-module imports
from .bsie import BSIE
from .builder import PipelineBuilder
-from .naming_policy import DefaultNamingPolicy
# exports
__all__: typing.Sequence[str] = (
diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py
index b02e707..9aa0bdb 100644
--- a/bsie/lib/bsie.py
+++ b/bsie/lib/bsie.py
@@ -3,10 +3,10 @@
import typing
# bsie imports
-from bsie.utils import bsfs, node, ns
+from bsie.utils import bsfs, ns
+from bsie.matcher import Matcher, nodes
# inner-module imports
-from .naming_policy import NamingPolicy
from .pipeline import Pipeline
# exports
@@ -40,7 +40,7 @@ class BSIE():
# pipeline.
pipeline: Pipeline,
# naming policy
- naming_policy: NamingPolicy,
+ matcher: Matcher,
# principals to extract at most. None implies all available w.r.t. extractors.
collect: typing.Optional[typing.Iterable[bsfs.URI]] = None,
# principals to discard.
@@ -48,7 +48,7 @@ class BSIE():
):
# store pipeline and naming policy
self._pipeline = pipeline
- self._naming_policy = naming_policy
+ self._matcher = matcher
# start off with available principals
self._principals = {pred.uri for pred in self._pipeline.principals}
# limit principals to specified ones by argument.
@@ -79,7 +79,7 @@ class BSIE():
self,
path: bsfs.URI,
principals: typing.Optional[typing.Iterable[bsfs.URI]] = None,
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.URI, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.URI, typing.Any]]:
"""Produce triples for a given *path*. Limit to *principals* if given."""
# get requested principals.
principals = set(principals) if principals is not None else self._principals
@@ -88,6 +88,6 @@ class BSIE():
# predicate lookup
principals = {self.schema.predicate(pred) for pred in principals}
# invoke pipeline
- yield from self._naming_policy(self._pipeline(path, principals))
+ yield from self._matcher(self._pipeline(path, principals))
## EOF ##
diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py
deleted file mode 100644
index fbdbeb0..0000000
--- a/bsie/lib/naming_policy.py
+++ /dev/null
@@ -1,141 +0,0 @@
-
-# standard imports
-import abc
-import os
-import typing
-
-# external imports
-import urllib.parse
-
-# bsie imports
-from bsie.utils import bsfs, errors, ns
-from bsie.utils.node import Node
-
-# exports
-__all__: typing.Sequence[str] = (
- 'DefaultNamingPolicy',
- )
-
-
-## code ##
-
-class NamingPolicy():
- """Determine node uri's from node hints."""
- def __call__(
- self,
- iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]],
- ):
- """Apply the policy on a triple iterator."""
- return NamingPolicyIterator(self, iterable)
-
- @abc.abstractmethod
- def handle_node(self, node: Node) -> Node:
- """Apply the policy on a node."""
-
-
-class NamingPolicyIterator():
- """Iterates over triples, determines uris according to a *policy* as it goes."""
-
- # source triple iterator.
- _iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]]
-
- # naming policy
- _policy: NamingPolicy
-
- def __init__(
- self,
- policy: NamingPolicy,
- iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]],
- ):
- self._iterable = iterable
- self._policy = policy
-
- def __iter__(self):
- for node, pred, value in self._iterable:
- # handle subject
- self._policy.handle_node(node)
- # handle value
- if isinstance(value, Node):
- self._policy.handle_node(value)
- # yield triple
- yield node, pred, value
-
-
-class DefaultNamingPolicy(NamingPolicy):
- """Compose URIs as <host/user/node_type#fragment>
-
- What information is used as fragment depends on the node type.
- Typically, the default is to use the "ucid" hint.
- The fallback in all cases is to generate a random uuid.
-
- Never changes previously assigned uris. Sets uris in-place.
-
- """
-
- def __init__(
- self,
- host: bsfs.URI,
- user: str,
- ):
- self._prefix = bsfs.Namespace(os.path.join(host, user))
- self._uuid = bsfs.uuid.UUID()
-
- def handle_node(self, node: Node) -> Node:
- if node.uri is not None:
- return node
- if node.node_type == ns.bsn.Entity:
- return self.name_entity(node)
- if node.node_type == ns.bsn.Preview:
- return self.name_preview(node)
- if node.node_type == ns.bsn.Tag:
- return self.name_tag(node)
- if node.node_type == ns.bsn.Face:
- return self.name_face(node)
- raise errors.ProgrammingError(f'no naming policy available for {node.node_type}')
-
- def name_entity(self, node: Node) -> Node:
- """Set a bsn:Entity node's uri fragment to its ucid."""
- if 'ucid' in node.hints: # content id
- fragment = node.hints['ucid']
- else: # random name
- fragment = self._uuid()
- node.uri = getattr(self._prefix.file(), fragment)
- return node
-
- def name_preview(self, node: Node) -> Node:
- """Set a bsn:Preview node's uri fragment to its ucid.
- Uses its source fragment as fallback. Appends the size if provided.
- """
- fragment = None
- if 'ucid' in node.hints: # content id
- fragment = node.hints['ucid']
- if fragment is None and 'source' in node.hints: # source id
- self.handle_node(node.hints['source'])
- fragment = node.hints['source'].uri.get('fragment', None)
- if fragment is None: # random name
- fragment = self._uuid()
- if 'size' in node.hints: # append size
- fragment += '_s' + str(node.hints['size'])
- node.uri = getattr(self._prefix.preview(), fragment)
- return node
-
- def name_tag(self, node: Node) -> Node:
- # NOTE: Must ensure to produce the same name for that tags with the same label.
- if 'label' in node.hints: # tag label
- fragment = urllib.parse.quote(node.hints['label'])
- else: # random name
- fragment = self._uuid()
- # FIXME: match to existing tags in bsfs storage!
- node.uri = getattr(self._prefix.tag(), fragment)
- return node
-
- def name_face(self, node: Node) -> Node:
- if 'ucid' in node.hints: # content id
- fragment = node.hints['ucid']
- else: # random name
- fragment = self._uuid()
- node.uri = getattr(self._prefix.face(), fragment)
- return node
-
-
-## EOF ##
diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py
index 30fd6fd..98d9cc8 100644
--- a/bsie/lib/pipeline.py
+++ b/bsie/lib/pipeline.py
@@ -6,8 +6,9 @@ import typing
# bsie imports
from bsie.extractor import Extractor
+from bsie.matcher import nodes
from bsie.reader import Reader
-from bsie.utils import bsfs, errors, node, ns
+from bsie.utils import bsfs, errors, ns
# exports
__all__: typing.Sequence[str] = (
@@ -85,7 +86,7 @@ class Pipeline():
self,
path: bsfs.URI,
principals: typing.Optional[typing.Iterable[bsfs.schema.Predicate]] = None,
- ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]:
+ ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]:
"""Extract triples from the file at *path*. Optionally, limit triples to *principals*."""
# get principals
principals = set(principals) if principals is not None else set(self.schema.predicates())
@@ -104,9 +105,7 @@ class Pipeline():
rdr2ext[rdr].add(ext)
# create subject for file
- subject = node.Node(ns.bsn.Entity,
- ucid=bsfs.uuid.UCID.from_path(path),
- )
+ subject = nodes.Entity(ucid=bsfs.uuid.UCID.from_path(path))
# extract information
for rdr, extrs in rdr2ext.items():