aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--bsie/apps/index.py16
-rw-r--r--bsie/apps/info.py1
-rw-r--r--bsie/lib/__init__.py1
-rw-r--r--bsie/lib/bsie.py10
-rw-r--r--bsie/lib/builder.py9
-rw-r--r--bsie/lib/naming_policy.py101
-rw-r--r--bsie/lib/pipeline.py18
-rw-r--r--bsie/utils/node.py29
-rw-r--r--test/lib/test_bsie.py22
-rw-r--r--test/lib/test_builder.py11
-rw-r--r--test/lib/test_naming_policy.py86
-rw-r--r--test/lib/test_pipeline.py28
-rw-r--r--test/utils/test_node.py54
13 files changed, 306 insertions, 80 deletions
diff --git a/bsie/apps/index.py b/bsie/apps/index.py
index 21c2318..a870364 100644
--- a/bsie/apps/index.py
+++ b/bsie/apps/index.py
@@ -11,7 +11,7 @@ import typing
# bsie imports
from bsie.extractor import ExtractorBuilder
-from bsie.lib import BSIE, PipelineBuilder
+from bsie.lib import BSIE, PipelineBuilder, DefaultNamingPolicy
from bsie.reader import ReaderBuilder
from bsie.utils import bsfs, errors
@@ -26,7 +26,9 @@ __all__: typing.Sequence[str] = (
def main(argv):
"""Index files or directories into BSFS."""
parser = argparse.ArgumentParser(description=main.__doc__, prog='index')
- parser.add_argument('--user', type=bsfs.URI, default=bsfs.URI('http://example.com/me'),
+ parser.add_argument('--host', type=bsfs.URI, default=bsfs.URI('http://example.com'),
+ help='')
+ parser.add_argument('--user', type=str, default='me',
help='')
parser.add_argument('--collect', action='append', default=[],
help='')
@@ -66,16 +68,19 @@ def main(argv):
])
# pipeline builder
pbuild = PipelineBuilder(
- bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')),
rbuild,
ebuild,
)
# build pipeline
pipeline = pbuild.build()
+ # build the naming policy
+ naming_policy = DefaultNamingPolicy(
+ host=args.host,
+ user=args.user,
+ )
# build BSIE frontend
- bsie = BSIE(pipeline, args.collect, args.discard)
-
+ bsie = BSIE(pipeline, naming_policy, args.collect, args.discard)
def walk(handle):
"""Walk through given input files."""
@@ -83,7 +88,6 @@ def main(argv):
# FIXME: simplify code (below but maybe also above)
# FIXME: How to handle dependencies between data?
# E.g. do I still want to link to a tag despite not being permitted to set its label?
- # FIXME: node renaming?
# index input paths
for path in args.input_file:
diff --git a/bsie/apps/info.py b/bsie/apps/info.py
index 64a4eba..4e948fc 100644
--- a/bsie/apps/info.py
+++ b/bsie/apps/info.py
@@ -54,7 +54,6 @@ def main(argv):
])
# pipeline builder
pbuild = PipelineBuilder(
- bsfs.Namespace('http://example.com/me/'), # not actually used
rbuild,
ebuild,
)
diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py
index 4239d3b..48379de 100644
--- a/bsie/lib/__init__.py
+++ b/bsie/lib/__init__.py
@@ -10,6 +10,7 @@ import typing
# inner-module imports
from .bsie import BSIE
from .builder import PipelineBuilder
+from .naming_policy import DefaultNamingPolicy
# exports
__all__: typing.Sequence[str] = (
diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py
index 668783d..a572525 100644
--- a/bsie/lib/bsie.py
+++ b/bsie/lib/bsie.py
@@ -11,6 +11,7 @@ import typing
from bsie.utils import bsfs, node, ns
# inner-module imports
+from .naming_policy import NamingPolicy
from .pipeline import Pipeline
# exports
@@ -41,15 +42,18 @@ class BSIE():
def __init__(
self,
- # pipeline builder.
+ # pipeline.
pipeline: Pipeline,
+ # naming policy
+ naming_policy: NamingPolicy,
# principals to extract at most. None implies all available w.r.t. extractors.
collect: typing.Optional[typing.Iterable[bsfs.URI]] = None,
# principals to discard.
discard: typing.Optional[typing.Iterable[bsfs.URI]] = None,
):
- # store pipeline
+ # store pipeline and naming policy
self._pipeline = pipeline
+ self._naming_policy = naming_policy
# start off with available principals
self._principals = {pred.uri for pred in self._pipeline.principals}
# limit principals to specified ones by argument.
@@ -89,6 +93,6 @@ class BSIE():
# predicate lookup
principals = {self.schema.predicate(pred) for pred in principals}
# invoke pipeline
- yield from self._pipeline(path, principals)
+ yield from self._naming_policy(self._pipeline(path, principals))
## EOF ##
diff --git a/bsie/lib/builder.py b/bsie/lib/builder.py
index c2abffe..39da441 100644
--- a/bsie/lib/builder.py
+++ b/bsie/lib/builder.py
@@ -11,7 +11,7 @@ import typing
# bsie imports
from bsie.extractor import ExtractorBuilder
from bsie.reader import ReaderBuilder
-from bsie.utils import bsfs, errors
+from bsie.utils import errors
# inner-module imports
from . import pipeline
@@ -29,9 +29,6 @@ logger = logging.getLogger(__name__)
class PipelineBuilder():
"""Build `bsie.tools.pipeline.Pipeline` instances."""
- # Prefix to be used in the Pipeline.
- prefix: bsfs.Namespace
-
# builder for Readers.
rbuild: ReaderBuilder
@@ -40,11 +37,9 @@ class PipelineBuilder():
def __init__(
self,
- prefix: bsfs.Namespace,
reader_builder: ReaderBuilder,
extractor_builder: ExtractorBuilder,
):
- self.prefix = prefix
self.rbuild = reader_builder
self.ebuild = extractor_builder
@@ -80,6 +75,6 @@ class PipelineBuilder():
except errors.BuilderError as err: # failed to build reader
logger.error(str(err))
- return pipeline.Pipeline(self.prefix, ext2rdr)
+ return pipeline.Pipeline(ext2rdr)
## EOF ##
diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py
new file mode 100644
index 0000000..360abde
--- /dev/null
+++ b/bsie/lib/naming_policy.py
@@ -0,0 +1,101 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import abc
+import os
+import typing
+
+# bsie imports
+from bsie.utils import bsfs, errors, ns
+from bsie.utils.node import Node
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'DefaultNamingPolicy',
+ )
+
+
+## code ##
+
+class NamingPolicy():
+ """Determine node uri's from node hints."""
+ def __call__(
+ self,
+ iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]],
+ ):
+ """Apply the policy on a triple iterator."""
+ return NamingPolicyIterator(self, iterable)
+
+ @abc.abstractmethod
+ def handle_node(self, node: Node) -> Node:
+ """Apply the policy on a node."""
+
+
+class NamingPolicyIterator():
+ """Iterates over triples, determines uris according to a *policy* as it goes."""
+
+ # source triple iterator.
+ _iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]]
+
+ # naming policy
+ _policy: NamingPolicy
+
+ def __init__(
+ self,
+ policy: NamingPolicy,
+ iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]],
+ ):
+ self._iterable = iterable
+ self._policy = policy
+
+ def __iter__(self):
+ for node, pred, value in self._iterable:
+ # handle subject
+ self._policy.handle_node(node)
+ # handle value
+ if isinstance(value, Node):
+ self._policy.handle_node(value)
+ # yield triple
+ yield node, pred, value
+
+
+class DefaultNamingPolicy(NamingPolicy):
+ """Compose URIs as <host/user/node_type#fragment>
+
+ What information is used as fragment depends on the node type.
+ Typically, the default is to use the "ucid" hint.
+ The fallback in all cases is to generate a random uuid.
+
+ Never changes previously assigned uris. Sets uris in-place.
+
+ """
+
+ def __init__(
+ self,
+ host: bsfs.URI,
+ user: str,
+ ):
+ self._prefix = bsfs.Namespace(os.path.join(host, user))
+ self._uuid = bsfs.uuid.UUID()
+
+ def handle_node(self, node: Node) -> Node:
+ if node.uri is not None:
+ return node
+ if node.node_type == ns.bsfs.File:
+ return self.name_file(node)
+ raise errors.ProgrammingError('no naming policy available for {node.node_type}')
+
+ def name_file(self, node: Node) -> Node:
+ """Set a bsfs:File node's uri fragment to its ucid."""
+ if 'ucid' in node.hints: # content id
+ fragment = node.hints['ucid']
+ else: # random name
+ fragment = self._uuid()
+ node.uri = (self._prefix + 'file')[fragment]
+ return node
+
+## EOF ##
diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py
index 44685ba..0bc5109 100644
--- a/bsie/lib/pipeline.py
+++ b/bsie/lib/pipeline.py
@@ -19,8 +19,6 @@ __all__: typing.Sequence[str] = (
'Pipeline',
)
-# constants
-FILE_PREFIX = 'file#'
## code ##
@@ -40,19 +38,14 @@ class Pipeline():
# combined extractor schemas.
_schema: bsfs.schema.Schema
- # node prefix.
- _prefix: bsfs.Namespace
-
# extractor -> reader mapping
_ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]]
def __init__(
self,
- prefix: bsfs.Namespace,
ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]]
):
# store core members
- self._prefix = prefix + FILE_PREFIX
self._ext2rdr = ext2rdr
# compile schema from all extractors
self._schema = bsfs.schema.Schema.Union(ext.schema for ext in ext2rdr)
@@ -64,12 +57,11 @@ class Pipeline():
return f'{bsfs.typename(self)}(...)'
def __hash__(self) -> int:
- return hash((type(self), self._prefix, self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values())))
+ return hash((type(self), self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values())))
def __eq__(self, other: typing.Any) -> bool:
return isinstance(other, type(self)) \
and self._schema == other._schema \
- and self._prefix == other._prefix \
and self._ext2rdr == other._ext2rdr
@property
@@ -117,8 +109,9 @@ class Pipeline():
rdr2ext[rdr].add(ext)
# create subject for file
- uuid = bsfs.uuid.UCID.from_path(path)
- subject = node.Node(ns.bsfs.File, self._prefix[uuid])
+ subject = node.Node(ns.bsfs.File,
+ ucid=bsfs.uuid.UCID.from_path(path),
+ )
# extract information
for rdr, extrs in rdr2ext.items():
@@ -131,8 +124,7 @@ class Pipeline():
for ext in extrs:
try:
# get predicate/value tuples
- for subject, pred, value in ext.extract(subject, content, principals):
- yield subject, pred, value
+ yield from ext.extract(subject, content, principals)
except errors.ExtractorError as err:
# critical extractor failure.
diff --git a/bsie/utils/node.py b/bsie/utils/node.py
index 91e4f37..aa62c06 100644
--- a/bsie/utils/node.py
+++ b/bsie/utils/node.py
@@ -19,30 +19,47 @@ __all__: typing.Sequence[str] = (
## code ##
class Node():
- """Lightweight Node, disconnected from any bsfs structures."""
+ """Lightweight Node, disconnected from any bsfs structures.
+
+ In most cases, provide *hints* and leave setting the uri to a node
+ naming policy. Only provide an *uri* if it is absolutely determined.
+
+ """
# node type.
node_type: bsfs.URI
# node URI.
- uri: bsfs.URI
+ uri: typing.Optional[bsfs.URI]
+
+ # node naming hints.
+ hits: dict
def __init__(
self,
node_type: bsfs.URI,
- uri: bsfs.URI,
+ uri: typing.Optional[bsfs.URI] = None,
+ **uri_hints,
):
# assign members
self.node_type = bsfs.URI(node_type)
- self.uri = bsfs.URI(uri)
+ self.hints = uri_hints
+ self.uri = uri
def __eq__(self, other: typing.Any) -> bool:
+ """Compare two Node instances based on type and uri.
+ Compares hits only if the uri is not yet specified.
+ """
return isinstance(other, Node) \
and other.node_type == self.node_type \
- and other.uri == self.uri
+ and other.uri == self.uri \
+ and (self.uri is not None or self.hints == other.hints)
def __hash__(self) -> int:
- return hash((type(self), self.node_type, self.uri))
+ identifier = self.uri
+ if identifier is None:
+ identifier = tuple((key, self.hints[key]) for key in sorted(self.hints))
+ return hash((type(self), self.node_type, identifier))
def __str__(self) -> str:
return f'{bsfs.typename(self)}({self.node_type}, {self.uri})'
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
index 38e6f59..ae23c4b 100644
--- a/test/lib/test_bsie.py
+++ b/test/lib/test_bsie.py
@@ -11,7 +11,7 @@ import unittest
# bsie imports
from bsie.extractor import ExtractorBuilder
from bsie.extractor.base import SCHEMA_PREAMBLE
-from bsie.lib import PipelineBuilder
+from bsie.lib import PipelineBuilder, DefaultNamingPolicy
from bsie.reader import ReaderBuilder
from bsie.utils import bsfs, node, ns
@@ -40,13 +40,13 @@ class TestBSIE(unittest.TestCase):
)},
])
# build pipeline
- self.prefix = bsfs.Namespace('http://example.com/local/')
- pbuild = PipelineBuilder(self.prefix, rbuild, ebuild)
+ self.naming_policy = DefaultNamingPolicy(host='http://example.com/local', user='')
+ pbuild = PipelineBuilder(rbuild, ebuild)
self.pipeline = pbuild.build()
def test_construction(self):
- # pipeline only
- lib = BSIE(self.pipeline)
+ # only pipeline and naming policy
+ lib = BSIE(self.pipeline, self.naming_policy)
self.assertSetEqual(set(lib.principals), {
ns.bse.filename,
ns.bse.filesize,
@@ -70,7 +70,7 @@ class TestBSIE(unittest.TestCase):
'''))
# specify collect
- lib = BSIE(self.pipeline, collect={
+ lib = BSIE(self.pipeline, self.naming_policy, collect={
ns.bse.filesize,
ns.bse.author,
ns.bse.inexistent,
@@ -91,7 +91,7 @@ class TestBSIE(unittest.TestCase):
bsfs:unique "true"^^xsd:boolean .
'''))
# empty collect is disregarded
- lib = BSIE(self.pipeline, collect={})
+ lib = BSIE(self.pipeline, self.naming_policy, collect={})
self.assertSetEqual(set(lib.principals), {
ns.bse.filename,
ns.bse.filesize,
@@ -116,7 +116,7 @@ class TestBSIE(unittest.TestCase):
'''))
# specify discard
- lib = BSIE(self.pipeline, discard={
+ lib = BSIE(self.pipeline, self.naming_policy, discard={
ns.bse.filesize,
ns.bse.filename,
ns.bse.inexistent,
@@ -132,7 +132,7 @@ class TestBSIE(unittest.TestCase):
'''))
# specify collect and discard
- lib = BSIE(self.pipeline,
+ lib = BSIE(self.pipeline, self.naming_policy,
collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar},
discard={ns.bse.author, ns.bse.foo, ns.bse.foobar},
)
@@ -150,14 +150,14 @@ class TestBSIE(unittest.TestCase):
def test_from_file(self):
# setup
- lib = BSIE(self.pipeline)
+ lib = BSIE(self.pipeline, self.naming_policy)
self.assertSetEqual(set(lib.principals), {
ns.bse.filesize,
ns.bse.filename,
ns.bse.author,
})
content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
- subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash])
+ subject = node.Node(ns.bsfs.File, uri=f'http://example.com/local/file#{content_hash}')
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
# from_file extracts all available triples
diff --git a/test/lib/test_builder.py b/test/lib/test_builder.py
index 273d620..48e932b 100644
--- a/test/lib/test_builder.py
+++ b/test/lib/test_builder.py
@@ -21,7 +21,6 @@ from bsie.lib import PipelineBuilder
class TestPipelineBuilder(unittest.TestCase):
def test_build(self):
- prefix = bsfs.URI('http://example.com/local/file#')
c_schema = '''
bse:author rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
@@ -40,7 +39,7 @@ class TestPipelineBuilder(unittest.TestCase):
)},
])
# build pipeline
- builder = PipelineBuilder(prefix, rbuild, ebuild)
+ builder = PipelineBuilder(rbuild, ebuild)
pipeline = builder.build()
# delayed import
import bsie.reader.path
@@ -61,7 +60,7 @@ class TestPipelineBuilder(unittest.TestCase):
{'bsie.extractor.generic.path.Path': {}},
])
with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
+ pipeline = PipelineBuilder(rbuild, ebuild_err).build()
self.assertDictEqual(pipeline._ext2rdr, {
bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
@@ -71,7 +70,7 @@ class TestPipelineBuilder(unittest.TestCase):
{'bsie.extractor.generic.path.Path': {}},
])
with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
+ pipeline = PipelineBuilder(rbuild, ebuild_err).build()
self.assertDictEqual(pipeline._ext2rdr, {
bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
@@ -81,7 +80,7 @@ class TestPipelineBuilder(unittest.TestCase):
old_reader = bsie.extractor.generic.path.Path.CONTENT_READER
bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo'
# build pipeline with invalid reader reference
- pipeline = PipelineBuilder(prefix, rbuild, ebuild).build()
+ pipeline = PipelineBuilder(rbuild, ebuild).build()
self.assertDictEqual(pipeline._ext2rdr, {
bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
@@ -92,7 +91,7 @@ class TestPipelineBuilder(unittest.TestCase):
# fail to build reader
rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build()
+ pipeline = PipelineBuilder(rbuild_err, ebuild).build()
self.assertDictEqual(pipeline._ext2rdr, {
bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
diff --git a/test/lib/test_naming_policy.py b/test/lib/test_naming_policy.py
new file mode 100644
index 0000000..763537b
--- /dev/null
+++ b/test/lib/test_naming_policy.py
@@ -0,0 +1,86 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+
+# bsie imports
+from bsie.utils import ns, errors
+from bsie.utils.bsfs import URI
+from bsie.utils.node import Node
+
+# objects to test
+from bsie.lib.naming_policy import NamingPolicy, NamingPolicyIterator, DefaultNamingPolicy
+
+
+
+## code ##
+
+class TestDefaultNamingPolicy(unittest.TestCase):
+
+ def test_handle_node(self):
+ # setup
+ policy = DefaultNamingPolicy('http://example.com', 'me')
+ # handle_node doesn't modify existing uris
+ self.assertEqual(policy.handle_node(
+ Node(ns.bsfs.Entity, uri='http://example.com/you/foo#bar')).uri,
+ URI('http://example.com/you/foo#bar'))
+ # processes bsfs:File
+ self.assertEqual(policy.handle_node(
+ Node(ns.bsfs.File, ucid='abc123cba')).uri,
+ URI('http://example.com/me/file#abc123cba'))
+ # raises an exception on unknown types
+ self.assertRaises(errors.ProgrammingError, policy.handle_node,
+ Node(ns.bsfs.Entity, ucid='abc123cba', size=123))
+
+ def test_name_file(self):
+ # setup
+ policy = DefaultNamingPolicy('http://example.com', 'me')
+ # name_file uses ucid
+ self.assertEqual(policy.name_file(
+ Node(ns.bsfs.File, ucid='123abc321')).uri,
+ URI('http://example.com/me/file#123abc321'))
+ # name_file falls back to a random guid
+ self.assertTrue(policy.name_file(
+ Node(ns.bsfs.File)).uri.startswith('http://example.com/me/file#'))
+
+
+class TestNamingPolicyIterator(unittest.TestCase):
+
+ def test_call(self): # NOTE: We test NamingPolicy.__call__ here
+ # setup
+ policy = DefaultNamingPolicy('http://example.com', 'me')
+ # call accepts list
+ triples = [('node', 'pred', 'value'), ('node', 'pred', 'value')]
+ it = policy(triples)
+ self.assertIsInstance(it, NamingPolicyIterator)
+ self.assertEqual(it._iterable, triples)
+ self.assertEqual(it._policy, policy)
+ # call accepts iterator
+ triples = iter([('node', 'pred', 'value'), ('node', 'pred', 'value')])
+ it = policy(triples)
+ self.assertIsInstance(it, NamingPolicyIterator)
+ self.assertEqual(it._iterable, triples)
+ self.assertEqual(it._policy, policy)
+
+ def test_iter(self):
+ # setup
+ policy = DefaultNamingPolicy('http://example.com', 'me')
+ triples = [
+ (Node(ns.bsfs.File, ucid='foo'), 'predA', 'hello'),
+ ]
+ # handles nodes, handles values, ignores predicate
+ self.assertListEqual(list(policy(triples)), [
+ (Node(ns.bsfs.File, uri='http://example.com/me/file#foo'), 'predA', 'hello'),
+ ])
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py
index 8fecc74..61fddd7 100644
--- a/test/lib/test_pipeline.py
+++ b/test/lib/test_pipeline.py
@@ -48,32 +48,28 @@ class TestPipeline(unittest.TestCase):
bsie.extractor.generic.constant.Constant(csA, tupA): None,
bsie.extractor.generic.constant.Constant(csB, tupB): None,
}
- self.prefix = bsfs.Namespace('http://example.com/local/')
def test_essentials(self):
- pipeline = Pipeline(self.prefix, self.ext2rdr)
+ pipeline = Pipeline(self.ext2rdr)
self.assertEqual(str(pipeline), 'Pipeline')
self.assertEqual(repr(pipeline), 'Pipeline(...)')
def test_equality(self):
- pipeline = Pipeline(self.prefix, self.ext2rdr)
+ pipeline = Pipeline(self.ext2rdr)
# a pipeline is equivalent to itself
self.assertEqual(pipeline, pipeline)
self.assertEqual(hash(pipeline), hash(pipeline))
# identical builds are equivalent
- self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr))
- self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr)))
+ self.assertEqual(pipeline, Pipeline(self.ext2rdr))
+ self.assertEqual(hash(pipeline), hash(Pipeline(self.ext2rdr)))
- # equivalence respects prefix
- self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))
- self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)))
# equivalence respects extractors/readers
ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0}
- self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr))
- self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr)))
+ self.assertNotEqual(pipeline, Pipeline(ext2rdr))
+ self.assertNotEqual(hash(pipeline), hash(Pipeline(ext2rdr)))
# equivalence respects schema
- p2 = Pipeline(self.prefix, self.ext2rdr)
+ p2 = Pipeline(self.ext2rdr)
p2._schema = bsfs.schema.Schema()
self.assertNotEqual(pipeline, p2)
self.assertNotEqual(hash(pipeline), hash(p2))
@@ -90,10 +86,10 @@ class TestPipeline(unittest.TestCase):
def test_call(self):
# build pipeline
- pipeline = Pipeline(self.prefix, self.ext2rdr)
+ pipeline = Pipeline(self.ext2rdr)
# build objects for tests
content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
- subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash])
+ subject = node.Node(ns.bsfs.File, ucid=content_hash)
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
p_filename = pipeline.schema.predicate(ns.bse.filename)
p_filesize = pipeline.schema.predicate(ns.bse.filesize)
@@ -138,7 +134,7 @@ class TestPipeline(unittest.TestCase):
def __call__(self, path):
raise errors.ReaderError('reader error')
- pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()})
+ pipeline = Pipeline({bsie.extractor.generic.path.Path(): FaultyReader()})
with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR):
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
p_filename = pipeline.schema.predicate(ns.bse.filename)
@@ -149,7 +145,7 @@ class TestPipeline(unittest.TestCase):
def extract(self, subject, content, predicates):
raise errors.ExtractorError('extractor error')
- pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()})
+ pipeline = Pipeline({FaultyExtractor(): bsie.reader.path.Path()})
with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR):
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
p_filename = pipeline.schema.predicate(ns.bse.filename)
@@ -157,7 +153,7 @@ class TestPipeline(unittest.TestCase):
def test_predicates(self):
# build pipeline
- pipeline = Pipeline(self.prefix, self.ext2rdr)
+ pipeline = Pipeline(self.ext2rdr)
#
self.assertSetEqual(set(pipeline.principals), {
pipeline.schema.predicate(ns.bse.filename),
diff --git a/test/utils/test_node.py b/test/utils/test_node.py
index 9feb051..1dcd0ed 100644
--- a/test/utils/test_node.py
+++ b/test/utils/test_node.py
@@ -18,22 +18,54 @@ from bsie.utils.node import Node
class TestNode(unittest.TestCase):
def test_equality(self):
- uri = bsfs.URI('http://example.com/me/entity#1234')
- node = Node(ns.bsfs.Entity, uri)
- # basic equivalence
- self.assertEqual(node, Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#1234')))
- self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#1234'))))
+ uri1 = bsfs.URI('http://example.com/me/entity#1234')
+ uri2 = bsfs.URI('http://example.com/me/entity#4321')
+ node = Node(ns.bsfs.Entity, uri1)
# equality respects uri
- self.assertNotEqual(node, Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321')))
- self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321'))))
+ self.assertEqual(node, Node(ns.bsfs.Entity, uri1))
+ self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, uri1)))
+ self.assertNotEqual(node, Node(ns.bsfs.Entity, uri2))
+ self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, uri2)))
+ # equality respects hints
+ self.assertEqual(
+ Node(ns.bsfs.Entity, foo='foo'),
+ Node(ns.bsfs.Entity, foo='foo'))
+ self.assertEqual(
+ hash(Node(ns.bsfs.Entity, foo='foo')),
+ hash(Node(ns.bsfs.Entity, foo='foo')))
+ self.assertNotEqual(
+ Node(ns.bsfs.Entity, foo='foo'),
+ Node(ns.bsfs.Entity, foo='bar'))
+ self.assertNotEqual(
+ hash(Node(ns.bsfs.Entity, foo='foo')),
+ hash(Node(ns.bsfs.Entity, foo='bar')))
+ self.assertNotEqual(
+ Node(ns.bsfs.Entity, foo='bar'),
+ Node(ns.bsfs.Entity, bar='foo'))
+ self.assertNotEqual(
+ hash(Node(ns.bsfs.Entity, foo='bar')),
+ hash(Node(ns.bsfs.Entity, bar='foo')))
+ # hints are irrelevant if uri is set
+ self.assertEqual(
+ Node(ns.bsfs.Entity, uri=uri1, foo='bar'),
+ Node(ns.bsfs.Entity, uri=uri1, bar='foo'))
+ self.assertEqual(
+ hash(Node(ns.bsfs.Entity, uri=uri1, foo='bar')),
+ hash(Node(ns.bsfs.Entity, uri=uri1, bar='foo')))
+ self.assertNotEqual(
+ Node(ns.bsfs.Entity, uri=uri1, foo='bar'),
+ Node(ns.bsfs.Entity, uri=uri2, bar='foo'))
+ self.assertNotEqual(
+ hash(Node(ns.bsfs.Entity, uri=uri1, foo='bar')),
+ hash(Node(ns.bsfs.Entity, uri=uri2, bar='foo')))
# equality respects node_type
- self.assertNotEqual(node, Node(ns.bsfs.Foo, uri))
- self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Foo, uri)))
+ self.assertNotEqual(node, Node(ns.bsfs.Foo, uri1))
+ self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Foo, uri1)))
# not equal to other types
self.assertNotEqual(node, 1234)
self.assertNotEqual(hash(node), hash(1234))
- self.assertNotEqual(node, uri)
- self.assertNotEqual(hash(node), hash(uri))
+ self.assertNotEqual(node, uri1)
+ self.assertNotEqual(hash(node), hash(uri1))
self.assertNotEqual(node, ns.bsfs.Entity)
self.assertNotEqual(hash(node), hash(ns.bsfs.Entity))
class Foo(): pass