From 9c26a5ef759b010d8cf4384b0515cc188b885d81 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 17:44:00 +0100 Subject: node naming policy --- bsie/apps/index.py | 16 ++++--- bsie/apps/info.py | 1 - bsie/lib/__init__.py | 1 + bsie/lib/bsie.py | 10 ++-- bsie/lib/builder.py | 9 +--- bsie/lib/naming_policy.py | 101 +++++++++++++++++++++++++++++++++++++++++ bsie/lib/pipeline.py | 18 ++------ bsie/utils/node.py | 29 +++++++++--- test/lib/test_bsie.py | 22 ++++----- test/lib/test_builder.py | 11 ++--- test/lib/test_naming_policy.py | 86 +++++++++++++++++++++++++++++++++++ test/lib/test_pipeline.py | 28 +++++------- test/utils/test_node.py | 54 +++++++++++++++++----- 13 files changed, 306 insertions(+), 80 deletions(-) create mode 100644 bsie/lib/naming_policy.py create mode 100644 test/lib/test_naming_policy.py diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 21c2318..a870364 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -11,7 +11,7 @@ import typing # bsie imports from bsie.extractor import ExtractorBuilder -from bsie.lib import BSIE, PipelineBuilder +from bsie.lib import BSIE, PipelineBuilder, DefaultNamingPolicy from bsie.reader import ReaderBuilder from bsie.utils import bsfs, errors @@ -26,7 +26,9 @@ __all__: typing.Sequence[str] = ( def main(argv): """Index files or directories into BSFS.""" parser = argparse.ArgumentParser(description=main.__doc__, prog='index') - parser.add_argument('--user', type=bsfs.URI, default=bsfs.URI('http://example.com/me'), + parser.add_argument('--host', type=bsfs.URI, default=bsfs.URI('http://example.com'), + help='') + parser.add_argument('--user', type=str, default='me', help='') parser.add_argument('--collect', action='append', default=[], help='') @@ -66,16 +68,19 @@ def main(argv): ]) # pipeline builder pbuild = PipelineBuilder( - bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')), rbuild, ebuild, ) # build pipeline pipeline = pbuild.build() + # build the naming policy + naming_policy = DefaultNamingPolicy( + host=args.host, + user=args.user, + ) # build BSIE frontend - bsie = BSIE(pipeline, args.collect, args.discard) - + bsie = BSIE(pipeline, naming_policy, args.collect, args.discard) def walk(handle): """Walk through given input files.""" @@ -83,7 +88,6 @@ def main(argv): # FIXME: simplify code (below but maybe also above) # FIXME: How to handle dependencies between data? # E.g. do I still want to link to a tag despite not being permitted to set its label? - # FIXME: node renaming? # index input paths for path in args.input_file: diff --git a/bsie/apps/info.py b/bsie/apps/info.py index 64a4eba..4e948fc 100644 --- a/bsie/apps/info.py +++ b/bsie/apps/info.py @@ -54,7 +54,6 @@ def main(argv): ]) # pipeline builder pbuild = PipelineBuilder( - bsfs.Namespace('http://example.com/me/'), # not actually used rbuild, ebuild, ) diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py index 4239d3b..48379de 100644 --- a/bsie/lib/__init__.py +++ b/bsie/lib/__init__.py @@ -10,6 +10,7 @@ import typing # inner-module imports from .bsie import BSIE from .builder import PipelineBuilder +from .naming_policy import DefaultNamingPolicy # exports __all__: typing.Sequence[str] = ( diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py index 668783d..a572525 100644 --- a/bsie/lib/bsie.py +++ b/bsie/lib/bsie.py @@ -11,6 +11,7 @@ import typing from bsie.utils import bsfs, node, ns # inner-module imports +from .naming_policy import NamingPolicy from .pipeline import Pipeline # exports @@ -41,15 +42,18 @@ class BSIE(): def __init__( self, - # pipeline builder. + # pipeline. pipeline: Pipeline, + # naming policy + naming_policy: NamingPolicy, # principals to extract at most. None implies all available w.r.t. extractors. collect: typing.Optional[typing.Iterable[bsfs.URI]] = None, # principals to discard. discard: typing.Optional[typing.Iterable[bsfs.URI]] = None, ): - # store pipeline + # store pipeline and naming policy self._pipeline = pipeline + self._naming_policy = naming_policy # start off with available principals self._principals = {pred.uri for pred in self._pipeline.principals} # limit principals to specified ones by argument. @@ -89,6 +93,6 @@ class BSIE(): # predicate lookup principals = {self.schema.predicate(pred) for pred in principals} # invoke pipeline - yield from self._pipeline(path, principals) + yield from self._naming_policy(self._pipeline(path, principals)) ## EOF ## diff --git a/bsie/lib/builder.py b/bsie/lib/builder.py index c2abffe..39da441 100644 --- a/bsie/lib/builder.py +++ b/bsie/lib/builder.py @@ -11,7 +11,7 @@ import typing # bsie imports from bsie.extractor import ExtractorBuilder from bsie.reader import ReaderBuilder -from bsie.utils import bsfs, errors +from bsie.utils import errors # inner-module imports from . import pipeline @@ -29,9 +29,6 @@ logger = logging.getLogger(__name__) class PipelineBuilder(): """Build `bsie.tools.pipeline.Pipeline` instances.""" - # Prefix to be used in the Pipeline. - prefix: bsfs.Namespace - # builder for Readers. rbuild: ReaderBuilder @@ -40,11 +37,9 @@ class PipelineBuilder(): def __init__( self, - prefix: bsfs.Namespace, reader_builder: ReaderBuilder, extractor_builder: ExtractorBuilder, ): - self.prefix = prefix self.rbuild = reader_builder self.ebuild = extractor_builder @@ -80,6 +75,6 @@ class PipelineBuilder(): except errors.BuilderError as err: # failed to build reader logger.error(str(err)) - return pipeline.Pipeline(self.prefix, ext2rdr) + return pipeline.Pipeline(ext2rdr) ## EOF ## diff --git a/bsie/lib/naming_policy.py b/bsie/lib/naming_policy.py new file mode 100644 index 0000000..360abde --- /dev/null +++ b/bsie/lib/naming_policy.py @@ -0,0 +1,101 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import abc +import os +import typing + +# bsie imports +from bsie.utils import bsfs, errors, ns +from bsie.utils.node import Node + +# exports +__all__: typing.Sequence[str] = ( + 'DefaultNamingPolicy', + ) + + +## code ## + +class NamingPolicy(): + """Determine node uri's from node hints.""" + def __call__( + self, + iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]], + ): + """Apply the policy on a triple iterator.""" + return NamingPolicyIterator(self, iterable) + + @abc.abstractmethod + def handle_node(self, node: Node) -> Node: + """Apply the policy on a node.""" + + +class NamingPolicyIterator(): + """Iterates over triples, determines uris according to a *policy* as it goes.""" + + # source triple iterator. + _iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]] + + # naming policy + _policy: NamingPolicy + + def __init__( + self, + policy: NamingPolicy, + iterable: typing.Iterable[typing.Tuple[Node, bsfs.URI, typing.Any]], + ): + self._iterable = iterable + self._policy = policy + + def __iter__(self): + for node, pred, value in self._iterable: + # handle subject + self._policy.handle_node(node) + # handle value + if isinstance(value, Node): + self._policy.handle_node(value) + # yield triple + yield node, pred, value + + +class DefaultNamingPolicy(NamingPolicy): + """Compose URIs as + + What information is used as fragment depends on the node type. + Typically, the default is to use the "ucid" hint. + The fallback in all cases is to generate a random uuid. + + Never changes previously assigned uris. Sets uris in-place. + + """ + + def __init__( + self, + host: bsfs.URI, + user: str, + ): + self._prefix = bsfs.Namespace(os.path.join(host, user)) + self._uuid = bsfs.uuid.UUID() + + def handle_node(self, node: Node) -> Node: + if node.uri is not None: + return node + if node.node_type == ns.bsfs.File: + return self.name_file(node) + raise errors.ProgrammingError('no naming policy available for {node.node_type}') + + def name_file(self, node: Node) -> Node: + """Set a bsfs:File node's uri fragment to its ucid.""" + if 'ucid' in node.hints: # content id + fragment = node.hints['ucid'] + else: # random name + fragment = self._uuid() + node.uri = (self._prefix + 'file')[fragment] + return node + +## EOF ## diff --git a/bsie/lib/pipeline.py b/bsie/lib/pipeline.py index 44685ba..0bc5109 100644 --- a/bsie/lib/pipeline.py +++ b/bsie/lib/pipeline.py @@ -19,8 +19,6 @@ __all__: typing.Sequence[str] = ( 'Pipeline', ) -# constants -FILE_PREFIX = 'file#' ## code ## @@ -40,19 +38,14 @@ class Pipeline(): # combined extractor schemas. _schema: bsfs.schema.Schema - # node prefix. - _prefix: bsfs.Namespace - # extractor -> reader mapping _ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]] def __init__( self, - prefix: bsfs.Namespace, ext2rdr: typing.Dict[Extractor, typing.Optional[Reader]] ): # store core members - self._prefix = prefix + FILE_PREFIX self._ext2rdr = ext2rdr # compile schema from all extractors self._schema = bsfs.schema.Schema.Union(ext.schema for ext in ext2rdr) @@ -64,12 +57,11 @@ class Pipeline(): return f'{bsfs.typename(self)}(...)' def __hash__(self) -> int: - return hash((type(self), self._prefix, self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values()))) + return hash((type(self), self._schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values()))) def __eq__(self, other: typing.Any) -> bool: return isinstance(other, type(self)) \ and self._schema == other._schema \ - and self._prefix == other._prefix \ and self._ext2rdr == other._ext2rdr @property @@ -117,8 +109,9 @@ class Pipeline(): rdr2ext[rdr].add(ext) # create subject for file - uuid = bsfs.uuid.UCID.from_path(path) - subject = node.Node(ns.bsfs.File, self._prefix[uuid]) + subject = node.Node(ns.bsfs.File, + ucid=bsfs.uuid.UCID.from_path(path), + ) # extract information for rdr, extrs in rdr2ext.items(): @@ -131,8 +124,7 @@ class Pipeline(): for ext in extrs: try: # get predicate/value tuples - for subject, pred, value in ext.extract(subject, content, principals): - yield subject, pred, value + yield from ext.extract(subject, content, principals) except errors.ExtractorError as err: # critical extractor failure. diff --git a/bsie/utils/node.py b/bsie/utils/node.py index 91e4f37..aa62c06 100644 --- a/bsie/utils/node.py +++ b/bsie/utils/node.py @@ -19,30 +19,47 @@ __all__: typing.Sequence[str] = ( ## code ## class Node(): - """Lightweight Node, disconnected from any bsfs structures.""" + """Lightweight Node, disconnected from any bsfs structures. + + In most cases, provide *hints* and leave setting the uri to a node + naming policy. Only provide an *uri* if it is absolutely determined. + + """ # node type. node_type: bsfs.URI # node URI. - uri: bsfs.URI + uri: typing.Optional[bsfs.URI] + + # node naming hints. + hits: dict def __init__( self, node_type: bsfs.URI, - uri: bsfs.URI, + uri: typing.Optional[bsfs.URI] = None, + **uri_hints, ): # assign members self.node_type = bsfs.URI(node_type) - self.uri = bsfs.URI(uri) + self.hints = uri_hints + self.uri = uri def __eq__(self, other: typing.Any) -> bool: + """Compare two Node instances based on type and uri. + Compares hits only if the uri is not yet specified. + """ return isinstance(other, Node) \ and other.node_type == self.node_type \ - and other.uri == self.uri + and other.uri == self.uri \ + and (self.uri is not None or self.hints == other.hints) def __hash__(self) -> int: - return hash((type(self), self.node_type, self.uri)) + identifier = self.uri + if identifier is None: + identifier = tuple((key, self.hints[key]) for key in sorted(self.hints)) + return hash((type(self), self.node_type, identifier)) def __str__(self) -> str: return f'{bsfs.typename(self)}({self.node_type}, {self.uri})' diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 38e6f59..ae23c4b 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -11,7 +11,7 @@ import unittest # bsie imports from bsie.extractor import ExtractorBuilder from bsie.extractor.base import SCHEMA_PREAMBLE -from bsie.lib import PipelineBuilder +from bsie.lib import PipelineBuilder, DefaultNamingPolicy from bsie.reader import ReaderBuilder from bsie.utils import bsfs, node, ns @@ -40,13 +40,13 @@ class TestBSIE(unittest.TestCase): )}, ]) # build pipeline - self.prefix = bsfs.Namespace('http://example.com/local/') - pbuild = PipelineBuilder(self.prefix, rbuild, ebuild) + self.naming_policy = DefaultNamingPolicy(host='http://example.com/local', user='') + pbuild = PipelineBuilder(rbuild, ebuild) self.pipeline = pbuild.build() def test_construction(self): - # pipeline only - lib = BSIE(self.pipeline) + # only pipeline and naming policy + lib = BSIE(self.pipeline, self.naming_policy) self.assertSetEqual(set(lib.principals), { ns.bse.filename, ns.bse.filesize, @@ -70,7 +70,7 @@ class TestBSIE(unittest.TestCase): ''')) # specify collect - lib = BSIE(self.pipeline, collect={ + lib = BSIE(self.pipeline, self.naming_policy, collect={ ns.bse.filesize, ns.bse.author, ns.bse.inexistent, @@ -91,7 +91,7 @@ class TestBSIE(unittest.TestCase): bsfs:unique "true"^^xsd:boolean . ''')) # empty collect is disregarded - lib = BSIE(self.pipeline, collect={}) + lib = BSIE(self.pipeline, self.naming_policy, collect={}) self.assertSetEqual(set(lib.principals), { ns.bse.filename, ns.bse.filesize, @@ -116,7 +116,7 @@ class TestBSIE(unittest.TestCase): ''')) # specify discard - lib = BSIE(self.pipeline, discard={ + lib = BSIE(self.pipeline, self.naming_policy, discard={ ns.bse.filesize, ns.bse.filename, ns.bse.inexistent, @@ -132,7 +132,7 @@ class TestBSIE(unittest.TestCase): ''')) # specify collect and discard - lib = BSIE(self.pipeline, + lib = BSIE(self.pipeline, self.naming_policy, collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar}, discard={ns.bse.author, ns.bse.foo, ns.bse.foobar}, ) @@ -150,14 +150,14 @@ class TestBSIE(unittest.TestCase): def test_from_file(self): # setup - lib = BSIE(self.pipeline) + lib = BSIE(self.pipeline, self.naming_policy) self.assertSetEqual(set(lib.principals), { ns.bse.filesize, ns.bse.filename, ns.bse.author, }) content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) + subject = node.Node(ns.bsfs.File, uri=f'http://example.com/local/file#{content_hash}') testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') # from_file extracts all available triples diff --git a/test/lib/test_builder.py b/test/lib/test_builder.py index 273d620..48e932b 100644 --- a/test/lib/test_builder.py +++ b/test/lib/test_builder.py @@ -21,7 +21,6 @@ from bsie.lib import PipelineBuilder class TestPipelineBuilder(unittest.TestCase): def test_build(self): - prefix = bsfs.URI('http://example.com/local/file#') c_schema = ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; @@ -40,7 +39,7 @@ class TestPipelineBuilder(unittest.TestCase): )}, ]) # build pipeline - builder = PipelineBuilder(prefix, rbuild, ebuild) + builder = PipelineBuilder(rbuild, ebuild) pipeline = builder.build() # delayed import import bsie.reader.path @@ -61,7 +60,7 @@ class TestPipelineBuilder(unittest.TestCase): {'bsie.extractor.generic.path.Path': {}}, ]) with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() + pipeline = PipelineBuilder(rbuild, ebuild_err).build() self.assertDictEqual(pipeline._ext2rdr, { bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) @@ -71,7 +70,7 @@ class TestPipelineBuilder(unittest.TestCase): {'bsie.extractor.generic.path.Path': {}}, ]) with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() + pipeline = PipelineBuilder(rbuild, ebuild_err).build() self.assertDictEqual(pipeline._ext2rdr, { bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) @@ -81,7 +80,7 @@ class TestPipelineBuilder(unittest.TestCase): old_reader = bsie.extractor.generic.path.Path.CONTENT_READER bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo' # build pipeline with invalid reader reference - pipeline = PipelineBuilder(prefix, rbuild, ebuild).build() + pipeline = PipelineBuilder(rbuild, ebuild).build() self.assertDictEqual(pipeline._ext2rdr, { bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, @@ -92,7 +91,7 @@ class TestPipelineBuilder(unittest.TestCase): # fail to build reader rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build() + pipeline = PipelineBuilder(rbuild_err, ebuild).build() self.assertDictEqual(pipeline._ext2rdr, { bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, diff --git a/test/lib/test_naming_policy.py b/test/lib/test_naming_policy.py new file mode 100644 index 0000000..763537b --- /dev/null +++ b/test/lib/test_naming_policy.py @@ -0,0 +1,86 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# bsie imports +from bsie.utils import ns, errors +from bsie.utils.bsfs import URI +from bsie.utils.node import Node + +# objects to test +from bsie.lib.naming_policy import NamingPolicy, NamingPolicyIterator, DefaultNamingPolicy + + + +## code ## + +class TestDefaultNamingPolicy(unittest.TestCase): + + def test_handle_node(self): + # setup + policy = DefaultNamingPolicy('http://example.com', 'me') + # handle_node doesn't modify existing uris + self.assertEqual(policy.handle_node( + Node(ns.bsfs.Entity, uri='http://example.com/you/foo#bar')).uri, + URI('http://example.com/you/foo#bar')) + # processes bsfs:File + self.assertEqual(policy.handle_node( + Node(ns.bsfs.File, ucid='abc123cba')).uri, + URI('http://example.com/me/file#abc123cba')) + # raises an exception on unknown types + self.assertRaises(errors.ProgrammingError, policy.handle_node, + Node(ns.bsfs.Entity, ucid='abc123cba', size=123)) + + def test_name_file(self): + # setup + policy = DefaultNamingPolicy('http://example.com', 'me') + # name_file uses ucid + self.assertEqual(policy.name_file( + Node(ns.bsfs.File, ucid='123abc321')).uri, + URI('http://example.com/me/file#123abc321')) + # name_file falls back to a random guid + self.assertTrue(policy.name_file( + Node(ns.bsfs.File)).uri.startswith('http://example.com/me/file#')) + + +class TestNamingPolicyIterator(unittest.TestCase): + + def test_call(self): # NOTE: We test NamingPolicy.__call__ here + # setup + policy = DefaultNamingPolicy('http://example.com', 'me') + # call accepts list + triples = [('node', 'pred', 'value'), ('node', 'pred', 'value')] + it = policy(triples) + self.assertIsInstance(it, NamingPolicyIterator) + self.assertEqual(it._iterable, triples) + self.assertEqual(it._policy, policy) + # call accepts iterator + triples = iter([('node', 'pred', 'value'), ('node', 'pred', 'value')]) + it = policy(triples) + self.assertIsInstance(it, NamingPolicyIterator) + self.assertEqual(it._iterable, triples) + self.assertEqual(it._policy, policy) + + def test_iter(self): + # setup + policy = DefaultNamingPolicy('http://example.com', 'me') + triples = [ + (Node(ns.bsfs.File, ucid='foo'), 'predA', 'hello'), + ] + # handles nodes, handles values, ignores predicate + self.assertListEqual(list(policy(triples)), [ + (Node(ns.bsfs.File, uri='http://example.com/me/file#foo'), 'predA', 'hello'), + ]) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py index 8fecc74..61fddd7 100644 --- a/test/lib/test_pipeline.py +++ b/test/lib/test_pipeline.py @@ -48,32 +48,28 @@ class TestPipeline(unittest.TestCase): bsie.extractor.generic.constant.Constant(csA, tupA): None, bsie.extractor.generic.constant.Constant(csB, tupB): None, } - self.prefix = bsfs.Namespace('http://example.com/local/') def test_essentials(self): - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) self.assertEqual(str(pipeline), 'Pipeline') self.assertEqual(repr(pipeline), 'Pipeline(...)') def test_equality(self): - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) # a pipeline is equivalent to itself self.assertEqual(pipeline, pipeline) self.assertEqual(hash(pipeline), hash(pipeline)) # identical builds are equivalent - self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr)) - self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr))) + self.assertEqual(pipeline, Pipeline(self.ext2rdr)) + self.assertEqual(hash(pipeline), hash(Pipeline(self.ext2rdr))) - # equivalence respects prefix - self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)) - self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))) # equivalence respects extractors/readers ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0} - self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr)) - self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr))) + self.assertNotEqual(pipeline, Pipeline(ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(ext2rdr))) # equivalence respects schema - p2 = Pipeline(self.prefix, self.ext2rdr) + p2 = Pipeline(self.ext2rdr) p2._schema = bsfs.schema.Schema() self.assertNotEqual(pipeline, p2) self.assertNotEqual(hash(pipeline), hash(p2)) @@ -90,10 +86,10 @@ class TestPipeline(unittest.TestCase): def test_call(self): # build pipeline - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) # build objects for tests content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) + subject = node.Node(ns.bsfs.File, ucid=content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) p_filesize = pipeline.schema.predicate(ns.bse.filesize) @@ -138,7 +134,7 @@ class TestPipeline(unittest.TestCase): def __call__(self, path): raise errors.ReaderError('reader error') - pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()}) + pipeline = Pipeline({bsie.extractor.generic.path.Path(): FaultyReader()}) with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR): testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) @@ -149,7 +145,7 @@ class TestPipeline(unittest.TestCase): def extract(self, subject, content, predicates): raise errors.ExtractorError('extractor error') - pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()}) + pipeline = Pipeline({FaultyExtractor(): bsie.reader.path.Path()}) with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR): testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) @@ -157,7 +153,7 @@ class TestPipeline(unittest.TestCase): def test_predicates(self): # build pipeline - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) # self.assertSetEqual(set(pipeline.principals), { pipeline.schema.predicate(ns.bse.filename), diff --git a/test/utils/test_node.py b/test/utils/test_node.py index 9feb051..1dcd0ed 100644 --- a/test/utils/test_node.py +++ b/test/utils/test_node.py @@ -18,22 +18,54 @@ from bsie.utils.node import Node class TestNode(unittest.TestCase): def test_equality(self): - uri = bsfs.URI('http://example.com/me/entity#1234') - node = Node(ns.bsfs.Entity, uri) - # basic equivalence - self.assertEqual(node, Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#1234'))) - self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#1234')))) + uri1 = bsfs.URI('http://example.com/me/entity#1234') + uri2 = bsfs.URI('http://example.com/me/entity#4321') + node = Node(ns.bsfs.Entity, uri1) # equality respects uri - self.assertNotEqual(node, Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321'))) - self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321')))) + self.assertEqual(node, Node(ns.bsfs.Entity, uri1)) + self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, uri1))) + self.assertNotEqual(node, Node(ns.bsfs.Entity, uri2)) + self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, uri2))) + # equality respects hints + self.assertEqual( + Node(ns.bsfs.Entity, foo='foo'), + Node(ns.bsfs.Entity, foo='foo')) + self.assertEqual( + hash(Node(ns.bsfs.Entity, foo='foo')), + hash(Node(ns.bsfs.Entity, foo='foo'))) + self.assertNotEqual( + Node(ns.bsfs.Entity, foo='foo'), + Node(ns.bsfs.Entity, foo='bar')) + self.assertNotEqual( + hash(Node(ns.bsfs.Entity, foo='foo')), + hash(Node(ns.bsfs.Entity, foo='bar'))) + self.assertNotEqual( + Node(ns.bsfs.Entity, foo='bar'), + Node(ns.bsfs.Entity, bar='foo')) + self.assertNotEqual( + hash(Node(ns.bsfs.Entity, foo='bar')), + hash(Node(ns.bsfs.Entity, bar='foo'))) + # hints are irrelevant if uri is set + self.assertEqual( + Node(ns.bsfs.Entity, uri=uri1, foo='bar'), + Node(ns.bsfs.Entity, uri=uri1, bar='foo')) + self.assertEqual( + hash(Node(ns.bsfs.Entity, uri=uri1, foo='bar')), + hash(Node(ns.bsfs.Entity, uri=uri1, bar='foo'))) + self.assertNotEqual( + Node(ns.bsfs.Entity, uri=uri1, foo='bar'), + Node(ns.bsfs.Entity, uri=uri2, bar='foo')) + self.assertNotEqual( + hash(Node(ns.bsfs.Entity, uri=uri1, foo='bar')), + hash(Node(ns.bsfs.Entity, uri=uri2, bar='foo'))) # equality respects node_type - self.assertNotEqual(node, Node(ns.bsfs.Foo, uri)) - self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Foo, uri))) + self.assertNotEqual(node, Node(ns.bsfs.Foo, uri1)) + self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Foo, uri1))) # not equal to other types self.assertNotEqual(node, 1234) self.assertNotEqual(hash(node), hash(1234)) - self.assertNotEqual(node, uri) - self.assertNotEqual(hash(node), hash(uri)) + self.assertNotEqual(node, uri1) + self.assertNotEqual(hash(node), hash(uri1)) self.assertNotEqual(node, ns.bsfs.Entity) self.assertNotEqual(hash(node), hash(ns.bsfs.Entity)) class Foo(): pass -- cgit v1.2.3