diff options
author | Matthias Baumgartner <dev@igsor.net> | 2023-02-08 17:44:00 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2023-02-08 17:44:00 +0100 |
commit | 9c26a5ef759b010d8cf4384b0515cc188b885d81 (patch) | |
tree | dfad86871856e8be5807b269e2fe6d2e6971fa9f /test/lib | |
parent | 05a841215c82ef40d4679dfc4d2c26572bd4d349 (diff) | |
download | bsie-9c26a5ef759b010d8cf4384b0515cc188b885d81.tar.gz bsie-9c26a5ef759b010d8cf4384b0515cc188b885d81.tar.bz2 bsie-9c26a5ef759b010d8cf4384b0515cc188b885d81.zip |
node naming policy
Diffstat (limited to 'test/lib')
-rw-r--r-- | test/lib/test_bsie.py | 22 | ||||
-rw-r--r-- | test/lib/test_builder.py | 11 | ||||
-rw-r--r-- | test/lib/test_naming_policy.py | 86 | ||||
-rw-r--r-- | test/lib/test_pipeline.py | 28 |
4 files changed, 114 insertions, 33 deletions
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 38e6f59..ae23c4b 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -11,7 +11,7 @@ import unittest # bsie imports from bsie.extractor import ExtractorBuilder from bsie.extractor.base import SCHEMA_PREAMBLE -from bsie.lib import PipelineBuilder +from bsie.lib import PipelineBuilder, DefaultNamingPolicy from bsie.reader import ReaderBuilder from bsie.utils import bsfs, node, ns @@ -40,13 +40,13 @@ class TestBSIE(unittest.TestCase): )}, ]) # build pipeline - self.prefix = bsfs.Namespace('http://example.com/local/') - pbuild = PipelineBuilder(self.prefix, rbuild, ebuild) + self.naming_policy = DefaultNamingPolicy(host='http://example.com/local', user='') + pbuild = PipelineBuilder(rbuild, ebuild) self.pipeline = pbuild.build() def test_construction(self): - # pipeline only - lib = BSIE(self.pipeline) + # only pipeline and naming policy + lib = BSIE(self.pipeline, self.naming_policy) self.assertSetEqual(set(lib.principals), { ns.bse.filename, ns.bse.filesize, @@ -70,7 +70,7 @@ class TestBSIE(unittest.TestCase): ''')) # specify collect - lib = BSIE(self.pipeline, collect={ + lib = BSIE(self.pipeline, self.naming_policy, collect={ ns.bse.filesize, ns.bse.author, ns.bse.inexistent, @@ -91,7 +91,7 @@ class TestBSIE(unittest.TestCase): bsfs:unique "true"^^xsd:boolean . ''')) # empty collect is disregarded - lib = BSIE(self.pipeline, collect={}) + lib = BSIE(self.pipeline, self.naming_policy, collect={}) self.assertSetEqual(set(lib.principals), { ns.bse.filename, ns.bse.filesize, @@ -116,7 +116,7 @@ class TestBSIE(unittest.TestCase): ''')) # specify discard - lib = BSIE(self.pipeline, discard={ + lib = BSIE(self.pipeline, self.naming_policy, discard={ ns.bse.filesize, ns.bse.filename, ns.bse.inexistent, @@ -132,7 +132,7 @@ class TestBSIE(unittest.TestCase): ''')) # specify collect and discard - lib = BSIE(self.pipeline, + lib = BSIE(self.pipeline, self.naming_policy, collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar}, discard={ns.bse.author, ns.bse.foo, ns.bse.foobar}, ) @@ -150,14 +150,14 @@ class TestBSIE(unittest.TestCase): def test_from_file(self): # setup - lib = BSIE(self.pipeline) + lib = BSIE(self.pipeline, self.naming_policy) self.assertSetEqual(set(lib.principals), { ns.bse.filesize, ns.bse.filename, ns.bse.author, }) content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) + subject = node.Node(ns.bsfs.File, uri=f'http://example.com/local/file#{content_hash}') testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') # from_file extracts all available triples diff --git a/test/lib/test_builder.py b/test/lib/test_builder.py index 273d620..48e932b 100644 --- a/test/lib/test_builder.py +++ b/test/lib/test_builder.py @@ -21,7 +21,6 @@ from bsie.lib import PipelineBuilder class TestPipelineBuilder(unittest.TestCase): def test_build(self): - prefix = bsfs.URI('http://example.com/local/file#') c_schema = ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; @@ -40,7 +39,7 @@ class TestPipelineBuilder(unittest.TestCase): )}, ]) # build pipeline - builder = PipelineBuilder(prefix, rbuild, ebuild) + builder = PipelineBuilder(rbuild, ebuild) pipeline = builder.build() # delayed import import bsie.reader.path @@ -61,7 +60,7 @@ class TestPipelineBuilder(unittest.TestCase): {'bsie.extractor.generic.path.Path': {}}, ]) with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() + pipeline = PipelineBuilder(rbuild, ebuild_err).build() self.assertDictEqual(pipeline._ext2rdr, { bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) @@ -71,7 +70,7 @@ class TestPipelineBuilder(unittest.TestCase): {'bsie.extractor.generic.path.Path': {}}, ]) with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() + pipeline = PipelineBuilder(rbuild, ebuild_err).build() self.assertDictEqual(pipeline._ext2rdr, { bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) @@ -81,7 +80,7 @@ class TestPipelineBuilder(unittest.TestCase): old_reader = bsie.extractor.generic.path.Path.CONTENT_READER bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo' # build pipeline with invalid reader reference - pipeline = PipelineBuilder(prefix, rbuild, ebuild).build() + pipeline = PipelineBuilder(rbuild, ebuild).build() self.assertDictEqual(pipeline._ext2rdr, { bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, @@ -92,7 +91,7 @@ class TestPipelineBuilder(unittest.TestCase): # fail to build reader rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR): - pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build() + pipeline = PipelineBuilder(rbuild_err, ebuild).build() self.assertDictEqual(pipeline._ext2rdr, { bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, diff --git a/test/lib/test_naming_policy.py b/test/lib/test_naming_policy.py new file mode 100644 index 0000000..763537b --- /dev/null +++ b/test/lib/test_naming_policy.py @@ -0,0 +1,86 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import unittest + +# bsie imports +from bsie.utils import ns, errors +from bsie.utils.bsfs import URI +from bsie.utils.node import Node + +# objects to test +from bsie.lib.naming_policy import NamingPolicy, NamingPolicyIterator, DefaultNamingPolicy + + + +## code ## + +class TestDefaultNamingPolicy(unittest.TestCase): + + def test_handle_node(self): + # setup + policy = DefaultNamingPolicy('http://example.com', 'me') + # handle_node doesn't modify existing uris + self.assertEqual(policy.handle_node( + Node(ns.bsfs.Entity, uri='http://example.com/you/foo#bar')).uri, + URI('http://example.com/you/foo#bar')) + # processes bsfs:File + self.assertEqual(policy.handle_node( + Node(ns.bsfs.File, ucid='abc123cba')).uri, + URI('http://example.com/me/file#abc123cba')) + # raises an exception on unknown types + self.assertRaises(errors.ProgrammingError, policy.handle_node, + Node(ns.bsfs.Entity, ucid='abc123cba', size=123)) + + def test_name_file(self): + # setup + policy = DefaultNamingPolicy('http://example.com', 'me') + # name_file uses ucid + self.assertEqual(policy.name_file( + Node(ns.bsfs.File, ucid='123abc321')).uri, + URI('http://example.com/me/file#123abc321')) + # name_file falls back to a random guid + self.assertTrue(policy.name_file( + Node(ns.bsfs.File)).uri.startswith('http://example.com/me/file#')) + + +class TestNamingPolicyIterator(unittest.TestCase): + + def test_call(self): # NOTE: We test NamingPolicy.__call__ here + # setup + policy = DefaultNamingPolicy('http://example.com', 'me') + # call accepts list + triples = [('node', 'pred', 'value'), ('node', 'pred', 'value')] + it = policy(triples) + self.assertIsInstance(it, NamingPolicyIterator) + self.assertEqual(it._iterable, triples) + self.assertEqual(it._policy, policy) + # call accepts iterator + triples = iter([('node', 'pred', 'value'), ('node', 'pred', 'value')]) + it = policy(triples) + self.assertIsInstance(it, NamingPolicyIterator) + self.assertEqual(it._iterable, triples) + self.assertEqual(it._policy, policy) + + def test_iter(self): + # setup + policy = DefaultNamingPolicy('http://example.com', 'me') + triples = [ + (Node(ns.bsfs.File, ucid='foo'), 'predA', 'hello'), + ] + # handles nodes, handles values, ignores predicate + self.assertListEqual(list(policy(triples)), [ + (Node(ns.bsfs.File, uri='http://example.com/me/file#foo'), 'predA', 'hello'), + ]) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py index 8fecc74..61fddd7 100644 --- a/test/lib/test_pipeline.py +++ b/test/lib/test_pipeline.py @@ -48,32 +48,28 @@ class TestPipeline(unittest.TestCase): bsie.extractor.generic.constant.Constant(csA, tupA): None, bsie.extractor.generic.constant.Constant(csB, tupB): None, } - self.prefix = bsfs.Namespace('http://example.com/local/') def test_essentials(self): - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) self.assertEqual(str(pipeline), 'Pipeline') self.assertEqual(repr(pipeline), 'Pipeline(...)') def test_equality(self): - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) # a pipeline is equivalent to itself self.assertEqual(pipeline, pipeline) self.assertEqual(hash(pipeline), hash(pipeline)) # identical builds are equivalent - self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr)) - self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr))) + self.assertEqual(pipeline, Pipeline(self.ext2rdr)) + self.assertEqual(hash(pipeline), hash(Pipeline(self.ext2rdr))) - # equivalence respects prefix - self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)) - self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))) # equivalence respects extractors/readers ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0} - self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr)) - self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr))) + self.assertNotEqual(pipeline, Pipeline(ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(ext2rdr))) # equivalence respects schema - p2 = Pipeline(self.prefix, self.ext2rdr) + p2 = Pipeline(self.ext2rdr) p2._schema = bsfs.schema.Schema() self.assertNotEqual(pipeline, p2) self.assertNotEqual(hash(pipeline), hash(p2)) @@ -90,10 +86,10 @@ class TestPipeline(unittest.TestCase): def test_call(self): # build pipeline - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) # build objects for tests content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) + subject = node.Node(ns.bsfs.File, ucid=content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) p_filesize = pipeline.schema.predicate(ns.bse.filesize) @@ -138,7 +134,7 @@ class TestPipeline(unittest.TestCase): def __call__(self, path): raise errors.ReaderError('reader error') - pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()}) + pipeline = Pipeline({bsie.extractor.generic.path.Path(): FaultyReader()}) with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR): testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) @@ -149,7 +145,7 @@ class TestPipeline(unittest.TestCase): def extract(self, subject, content, predicates): raise errors.ExtractorError('extractor error') - pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()}) + pipeline = Pipeline({FaultyExtractor(): bsie.reader.path.Path()}) with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR): testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) @@ -157,7 +153,7 @@ class TestPipeline(unittest.TestCase): def test_predicates(self): # build pipeline - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) # self.assertSetEqual(set(pipeline.principals), { pipeline.schema.predicate(ns.bse.filename), |