From 3e6a69ce7f109f0fd4352507ad60d58d4cbd24a7 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:43:12 +0100 Subject: builders and pipeline --- test/tools/__init__.py | 0 test/tools/test_builder.py | 247 ++++++++++++++++++++++++++++++++++++++++++++ test/tools/test_pipeline.py | 167 ++++++++++++++++++++++++++++++ test/tools/testfile.t | 1 + 4 files changed, 415 insertions(+) create mode 100644 test/tools/__init__.py create mode 100644 test/tools/test_builder.py create mode 100644 test/tools/test_pipeline.py create mode 100644 test/tools/testfile.t (limited to 'test/tools') diff --git a/test/tools/__init__.py b/test/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py new file mode 100644 index 0000000..bef0e9d --- /dev/null +++ b/test/tools/test_builder.py @@ -0,0 +1,247 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import logging +import unittest + +# bsie imports +from bsie import base +from bsie.base import errors +from bsie.utils.bsfs import URI + +# objects to test +from bsie.tools.builder import ExtractorBuilder +from bsie.tools.builder import PipelineBuilder +from bsie.tools.builder import ReaderBuilder +from bsie.tools.builder import _safe_load +from bsie.tools.builder import _unpack_name + + +## code ## + +class TestUtils(unittest.TestCase): + def test_safe_load(self): + # invalid module + self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar') + self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar') + # partially valid module + self.assertRaises(errors.LoaderError, _safe_load, 'os.foo', 'foobar') + # invalid class + self.assertRaises(errors.LoaderError, _safe_load, 'os.path', 'foo') + # valid module and class + cls = _safe_load('collections.abc', 'Container') + import collections.abc + self.assertEqual(cls, collections.abc.Container) + + def test_unpack_name(self): + self.assertRaises(TypeError, _unpack_name, 123) + self.assertRaises(TypeError, _unpack_name, None) + self.assertRaises(ValueError, _unpack_name, '') + self.assertRaises(ValueError, _unpack_name, 'path') + self.assertRaises(ValueError, _unpack_name, '.Path') + self.assertEqual(_unpack_name('path.Path'), ('path', 'Path')) + self.assertEqual(_unpack_name('path.foo.bar.Path'), ('path.foo.bar', 'Path')) + + +class TestReaderBuilder(unittest.TestCase): + def test_build(self): + builder = ReaderBuilder({'bsie.reader.path.Path': {}}) + # build configured reader + cls = builder.build('bsie.reader.path.Path') + import bsie.reader.path + self.assertIsInstance(cls, bsie.reader.path.Path) + # build unconfigured reader + cls = builder.build('bsie.reader.stat.Stat') + import bsie.reader.stat + self.assertIsInstance(cls, bsie.reader.stat.Stat) + # re-build previous reader (test cache) + self.assertEqual(cls, builder.build('bsie.reader.stat.Stat')) + # test invalid + self.assertRaises(TypeError, builder.build, 123) + self.assertRaises(TypeError, builder.build, None) + self.assertRaises(ValueError, builder.build, '') + self.assertRaises(ValueError, builder.build, 'Path') + self.assertRaises(errors.BuilderError, builder.build, 'path.Path') + # invalid config + builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) + self.assertRaises(errors.BuilderError, builder.build, 'bsie.reader.stat.Stat') + builder = ReaderBuilder({'bsie.reader.stat.Stat': 123}) + self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat') + # no instructions + builder = ReaderBuilder({}) + cls = builder.build('bsie.reader.stat.Stat') + self.assertIsInstance(cls, bsie.reader.stat.Stat) + + + +class TestExtractorBuilder(unittest.TestCase): + def test_iter(self): + # no specifications + self.assertListEqual(list(ExtractorBuilder([])), []) + # some specifications + builder = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + self.assertListEqual(list(builder), [0, 1, 2]) + + def test_build(self): + # simple and repeated extractors + builder = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + ext = [builder.build(0), builder.build(1), builder.build(2)] + import bsie.extractor.generic.path + import bsie.extractor.generic.stat + self.assertListEqual(ext, [ + bsie.extractor.generic.path.Path(), + bsie.extractor.generic.stat.Stat(), + bsie.extractor.generic.path.Path(), + ]) + # out-of-bounds raises KeyError + self.assertRaises(IndexError, builder.build, 3) + + # building with args + builder = ExtractorBuilder([ + {'bsie.extractor.generic.constant.Constant': { + 'schema': ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "1"^^xsd:number . + bse:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + owl:maxCardinality "1"^^xsd:number . + ''', + 'tuples': [ + ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), + ('http://bsfs.ai/schema/Entity#rating', 123), + ], + }}]) + obj = builder.build(0) + import bsie.extractor.generic.constant + self.assertEqual(obj, bsie.extractor.generic.constant.Constant(''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "1"^^xsd:number . + bse:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + owl:maxCardinality "1"^^xsd:number . + ''', [ + ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), + ('http://bsfs.ai/schema/Entity#rating', 123), + ])) + + # building with invalid args + self.assertRaises(errors.BuilderError, ExtractorBuilder( + [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0) + # non-dict build specification + self.assertRaises(TypeError, ExtractorBuilder( + [('bsie.extractor.generic.path.Path', {})]).build, 0) + # multiple keys per build specification + self.assertRaises(TypeError, ExtractorBuilder( + [{'bsie.extractor.generic.path.Path': {}, + 'bsie.extractor.generic.stat.Stat': {}}]).build, 0) + # non-dict value for kwargs + self.assertRaises(TypeError, ExtractorBuilder( + [{'bsie.extractor.generic.path.Path': 123}]).build, 0) + + + + +class TestPipelineBuilder(unittest.TestCase): + def test_build(self): + prefix = URI('http://example.com/local/file#') + c_schema = ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "1"^^xsd:number . + ''' + c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] + # prepare builders + rbuild = ReaderBuilder({}) + ebuild = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.constant.Constant': dict( + schema=c_schema, + tuples=c_tuples, + )}, + ]) + # build pipeline + builder = PipelineBuilder(prefix, rbuild, ebuild) + pipeline = builder.build() + # delayed import + import bsie.reader.path + import bsie.reader.stat + import bsie.extractor.generic.path + import bsie.extractor.generic.stat + import bsie.extractor.generic.constant + # check pipeline + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), + bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), + bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, + }) + + # fail to load extractor + ebuild_err = ExtractorBuilder([ + {'bsie.extractor.generic.foo.Foo': {}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): + pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) + + # fail to build extractor + ebuild_err = ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {'foo': 123}}, + {'bsie.extractor.generic.path.Path': {}}, + ]) + with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): + pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path()}) + + # fail to load reader + with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): + # switch reader of an extractor + old_reader = bsie.extractor.generic.path.Path.CONTENT_READER + bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo' + # build pipeline with invalid reader reference + pipeline = PipelineBuilder(prefix, rbuild, ebuild).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), + bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, + }) + # switch back + bsie.extractor.generic.path.Path.CONTENT_READER = old_reader + + # fail to build reader + rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) + with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR): + pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build() + self.assertDictEqual(pipeline._ext2rdr, { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), + bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None, + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py new file mode 100644 index 0000000..9888d2e --- /dev/null +++ b/test/tools/test_pipeline.py @@ -0,0 +1,167 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import logging +import os +import unittest + +# bsie imports +from bsie.base import errors +from bsie.utils import ns +from bsie.utils.bsfs import URI +from bsie.utils.node import Node +import bsie.extractor.generic.constant +import bsie.extractor.generic.path +import bsie.extractor.generic.stat +import bsie.reader.path +import bsie.reader.stat + +# objects to test +from bsie.tools.pipeline import Pipeline + + +## code ## + +class TestPipeline(unittest.TestCase): + def setUp(self): + # constant A + csA = ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "1"^^xsd:number . + ''' + tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] + # constant B + csB = ''' + bse:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + owl:maxCardinality "1"^^xsd:number . + ''' + tupB = [('http://bsfs.ai/schema/Entity#rating', 123)] + # extractors/readers + self.ext2rdr = { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), + bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), + bsie.extractor.generic.constant.Constant(csA, tupA): None, + bsie.extractor.generic.constant.Constant(csB, tupB): None, + } + self.prefix = URI('http://example.com/local/file#') + + def test_essentials(self): + pipeline = Pipeline(self.prefix, self.ext2rdr) + self.assertEqual(str(pipeline), 'Pipeline') + self.assertEqual(repr(pipeline), 'Pipeline(...)') + + def test_equality(self): + pipeline = Pipeline(self.prefix, self.ext2rdr) + # a pipeline is equivalent to itself + self.assertEqual(pipeline, pipeline) + self.assertEqual(hash(pipeline), hash(pipeline)) + # identical builds are equivalent + self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr)) + self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr))) + + # equivalence respects prefix + self.assertNotEqual(pipeline, Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr))) + # equivalence respects extractors/readers + ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0} + self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr))) + + # equivalence respects schema + p2 = Pipeline(self.prefix, self.ext2rdr) + p2.schema = pipeline.schema.Empty() + self.assertNotEqual(pipeline, p2) + self.assertNotEqual(hash(pipeline), hash(p2)) + + # not equal to other types + class Foo(): pass + self.assertNotEqual(pipeline, Foo()) + self.assertNotEqual(hash(pipeline), hash(Foo())) + self.assertNotEqual(pipeline, 123) + self.assertNotEqual(hash(pipeline), hash(123)) + self.assertNotEqual(pipeline, None) + self.assertNotEqual(hash(pipeline), hash(None)) + + + def test_call(self): + # build pipeline + pipeline = Pipeline(self.prefix, self.ext2rdr) + # build objects for tests + content_hash = 'e3bb4ab54e4a50d75626a1f76814f152f4edc60a82ad724aa2aa922ca5534427' + subject = Node(ns.bsfs.Entity, self.prefix + content_hash) + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + p_filesize = pipeline.schema.predicate(ns.bse.filesize) + p_author = pipeline.schema.predicate(ns.bse.author) + p_rating = pipeline.schema.predicate(ns.bse.rating) + entity = pipeline.schema.node(ns.bsfs.Entity) + p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity) + + # extract given predicates + self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), { + (subject, p_filename, 'testfile.t'), + (subject, p_filesize, 11), + }) + self.assertSetEqual(set(pipeline(testfile, {p_author})), { + (subject, p_author, 'Me, myself, and I'), + }) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), { + (subject, p_filename, 'testfile.t'), + }) + self.assertSetEqual(set(pipeline(testfile, {p_filesize})), { + (subject, p_filesize, 11), + }) + # extract all predicates + self.assertSetEqual(set(pipeline(testfile)), { + (subject, p_filename, 'testfile.t'), + (subject, p_filesize, 11), + (subject, p_author, 'Me, myself, and I'), + (subject, p_rating, 123), + }) + # invalid predicate + self.assertSetEqual(set(pipeline(testfile, {p_invalid})), set()) + # valid/invalid predicates mixed + self.assertSetEqual(set(pipeline(testfile, {p_filename, p_invalid})), { + (subject, p_filename, 'testfile.t'), + }) + # invalid path + self.assertRaises(FileNotFoundError, list, pipeline('inexistent_file')) + # FIXME: unreadable file (e.g. permissions error) + + def test_call_reader_err(self): + class FaultyReader(bsie.reader.path.Path): + def __call__(self, path): + raise errors.ReaderError('reader error') + + pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()}) + with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR): + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) + + def test_call_extractor_err(self): + class FaultyExtractor(bsie.extractor.generic.path.Path): + def extract(self, subject, content, predicates): + raise errors.ExtractorError('extractor error') + + pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()}) + with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR): + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/tools/testfile.t b/test/tools/testfile.t new file mode 100644 index 0000000..58bf1b8 --- /dev/null +++ b/test/tools/testfile.t @@ -0,0 +1 @@ +hello worl -- cgit v1.2.3 From edc747252a04675c46059215751719b6666a77f9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 3 Dec 2022 18:57:58 +0100 Subject: adapt to schema interface update: owl:maxCardinality changed to bsfs:unique --- test/tools/test_builder.py | 10 +++++----- test/tools/test_pipeline.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'test/tools') diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py index bef0e9d..bc6f903 100644 --- a/test/tools/test_builder.py +++ b/test/tools/test_builder.py @@ -115,11 +115,11 @@ class TestExtractorBuilder(unittest.TestCase): bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . bse:rating rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . ''', 'tuples': [ ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), @@ -132,11 +132,11 @@ class TestExtractorBuilder(unittest.TestCase): bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . bse:rating rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . ''', [ ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'), ('http://bsfs.ai/schema/Entity#rating', 123), @@ -166,7 +166,7 @@ class TestPipelineBuilder(unittest.TestCase): bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . ''' c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] # prepare builders diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 9888d2e..f98b329 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -33,7 +33,7 @@ class TestPipeline(unittest.TestCase): bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . ''' tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] # constant B @@ -41,7 +41,7 @@ class TestPipeline(unittest.TestCase): bse:rating rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . ''' tupB = [('http://bsfs.ai/schema/Entity#rating', 123)] # extractors/readers -- cgit v1.2.3 From 559e643bb1fa39feefd2eb73847ad9420daf1deb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 14 Dec 2022 06:10:25 +0100 Subject: bsie extraction and info apps --- test/tools/test_pipeline.py | 20 ++++++++++++++++---- test/tools/testfile.t | 2 +- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'test/tools') diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index f98b329..0dd8c75 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -95,7 +95,7 @@ class TestPipeline(unittest.TestCase): # build pipeline pipeline = Pipeline(self.prefix, self.ext2rdr) # build objects for tests - content_hash = 'e3bb4ab54e4a50d75626a1f76814f152f4edc60a82ad724aa2aa922ca5534427' + content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' subject = Node(ns.bsfs.Entity, self.prefix + content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) @@ -108,7 +108,7 @@ class TestPipeline(unittest.TestCase): # extract given predicates self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), { (subject, p_filename, 'testfile.t'), - (subject, p_filesize, 11), + (subject, p_filesize, 12), }) self.assertSetEqual(set(pipeline(testfile, {p_author})), { (subject, p_author, 'Me, myself, and I'), @@ -117,12 +117,12 @@ class TestPipeline(unittest.TestCase): (subject, p_filename, 'testfile.t'), }) self.assertSetEqual(set(pipeline(testfile, {p_filesize})), { - (subject, p_filesize, 11), + (subject, p_filesize, 12), }) # extract all predicates self.assertSetEqual(set(pipeline(testfile)), { (subject, p_filename, 'testfile.t'), - (subject, p_filesize, 11), + (subject, p_filesize, 12), (subject, p_author, 'Me, myself, and I'), (subject, p_rating, 123), }) @@ -158,6 +158,18 @@ class TestPipeline(unittest.TestCase): p_filename = pipeline.schema.predicate(ns.bse.filename) self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) + def test_predicates(self): + # build pipeline + pipeline = Pipeline(self.prefix, self.ext2rdr) + # + self.assertSetEqual(set(pipeline.predicates()), { + pipeline.schema.predicate(ns.bsfs.Predicate), + pipeline.schema.predicate(ns.bse.filename), + pipeline.schema.predicate(ns.bse.filesize), + pipeline.schema.predicate(ns.bse.author), + pipeline.schema.predicate(ns.bse.rating), + }) + ## main ## diff --git a/test/tools/testfile.t b/test/tools/testfile.t index 58bf1b8..3b18e51 100644 --- a/test/tools/testfile.t +++ b/test/tools/testfile.t @@ -1 +1 @@ -hello worl +hello world -- cgit v1.2.3 From 3b7fee369924eb7704709edeb8c17fff9c020dfb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:06:09 +0100 Subject: import fixes --- test/tools/test_builder.py | 19 +++++++++---------- test/tools/test_pipeline.py | 9 ++++----- 2 files changed, 13 insertions(+), 15 deletions(-) (limited to 'test/tools') diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py index bc6f903..62c637c 100644 --- a/test/tools/test_builder.py +++ b/test/tools/test_builder.py @@ -10,8 +10,7 @@ import unittest # bsie imports from bsie import base -from bsie.base import errors -from bsie.utils.bsfs import URI +from bsie.utils import bsfs # objects to test from bsie.tools.builder import ExtractorBuilder @@ -26,12 +25,12 @@ from bsie.tools.builder import _unpack_name class TestUtils(unittest.TestCase): def test_safe_load(self): # invalid module - self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar') - self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar') + self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar') + self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar') # partially valid module - self.assertRaises(errors.LoaderError, _safe_load, 'os.foo', 'foobar') + self.assertRaises(base.errors.LoaderError, _safe_load, 'os.foo', 'foobar') # invalid class - self.assertRaises(errors.LoaderError, _safe_load, 'os.path', 'foo') + self.assertRaises(base.errors.LoaderError, _safe_load, 'os.path', 'foo') # valid module and class cls = _safe_load('collections.abc', 'Container') import collections.abc @@ -65,10 +64,10 @@ class TestReaderBuilder(unittest.TestCase): self.assertRaises(TypeError, builder.build, None) self.assertRaises(ValueError, builder.build, '') self.assertRaises(ValueError, builder.build, 'Path') - self.assertRaises(errors.BuilderError, builder.build, 'path.Path') + self.assertRaises(base.errors.BuilderError, builder.build, 'path.Path') # invalid config builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)}) - self.assertRaises(errors.BuilderError, builder.build, 'bsie.reader.stat.Stat') + self.assertRaises(base.errors.BuilderError, builder.build, 'bsie.reader.stat.Stat') builder = ReaderBuilder({'bsie.reader.stat.Stat': 123}) self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat') # no instructions @@ -143,7 +142,7 @@ class TestExtractorBuilder(unittest.TestCase): ])) # building with invalid args - self.assertRaises(errors.BuilderError, ExtractorBuilder( + self.assertRaises(base.errors.BuilderError, ExtractorBuilder( [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0) # non-dict build specification self.assertRaises(TypeError, ExtractorBuilder( @@ -161,7 +160,7 @@ class TestExtractorBuilder(unittest.TestCase): class TestPipelineBuilder(unittest.TestCase): def test_build(self): - prefix = URI('http://example.com/local/file#') + prefix = bsfs.URI('http://example.com/local/file#') c_schema = ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 0dd8c75..92801ed 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -11,9 +11,8 @@ import unittest # bsie imports from bsie.base import errors -from bsie.utils import ns from bsie.utils.bsfs import URI -from bsie.utils.node import Node +from bsie.utils import bsfs, node, ns import bsie.extractor.generic.constant import bsie.extractor.generic.path import bsie.extractor.generic.stat @@ -68,8 +67,8 @@ class TestPipeline(unittest.TestCase): self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr))) # equivalence respects prefix - self.assertNotEqual(pipeline, Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr)) - self.assertNotEqual(hash(pipeline), hash(Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr))) + self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))) # equivalence respects extractors/readers ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0} self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr)) @@ -96,7 +95,7 @@ class TestPipeline(unittest.TestCase): pipeline = Pipeline(self.prefix, self.ext2rdr) # build objects for tests content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = Node(ns.bsfs.Entity, self.prefix + content_hash) + subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) p_filesize = pipeline.schema.predicate(ns.bse.filesize) -- cgit v1.2.3 From 8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:12:56 +0100 Subject: file node class in default schema --- test/tools/test_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test/tools') diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 92801ed..611f8b0 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -30,7 +30,7 @@ class TestPipeline(unittest.TestCase): # constant A csA = ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''' @@ -38,7 +38,7 @@ class TestPipeline(unittest.TestCase): # constant B csB = ''' bse:rating rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . ''' -- cgit v1.2.3 From 5d9526783ad8432c7d6dfe18c0e9f2b37950b470 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:16:25 +0100 Subject: Pipeline.prefix as Namespace instead of URI --- test/tools/test_pipeline.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'test/tools') diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 611f8b0..e440ab5 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -11,7 +11,6 @@ import unittest # bsie imports from bsie.base import errors -from bsie.utils.bsfs import URI from bsie.utils import bsfs, node, ns import bsie.extractor.generic.constant import bsie.extractor.generic.path @@ -50,7 +49,7 @@ class TestPipeline(unittest.TestCase): bsie.extractor.generic.constant.Constant(csA, tupA): None, bsie.extractor.generic.constant.Constant(csB, tupB): None, } - self.prefix = URI('http://example.com/local/file#') + self.prefix = bsfs.Namespace('http://example.com/local/') def test_essentials(self): pipeline = Pipeline(self.prefix, self.ext2rdr) @@ -101,7 +100,7 @@ class TestPipeline(unittest.TestCase): p_filesize = pipeline.schema.predicate(ns.bse.filesize) p_author = pipeline.schema.predicate(ns.bse.author) p_rating = pipeline.schema.predicate(ns.bse.rating) - entity = pipeline.schema.node(ns.bsfs.Entity) + entity = pipeline.schema.node(ns.bsfs.File) p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity) # extract given predicates -- cgit v1.2.3 From 37510d134458bf954ca2da6d40be0d6c76661e8c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:19:21 +0100 Subject: bsie/pipeline interface revision: * predicates -> principals * schema as property * principals as property * information hiding * full subschema instead of only predicates --- test/tools/test_pipeline.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'test/tools') diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index e440ab5..91bf736 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -75,7 +75,7 @@ class TestPipeline(unittest.TestCase): # equivalence respects schema p2 = Pipeline(self.prefix, self.ext2rdr) - p2.schema = pipeline.schema.Empty() + p2._schema = pipeline.schema.Empty() self.assertNotEqual(pipeline, p2) self.assertNotEqual(hash(pipeline), hash(p2)) @@ -160,8 +160,7 @@ class TestPipeline(unittest.TestCase): # build pipeline pipeline = Pipeline(self.prefix, self.ext2rdr) # - self.assertSetEqual(set(pipeline.predicates()), { - pipeline.schema.predicate(ns.bsfs.Predicate), + self.assertSetEqual(set(pipeline.principals), { pipeline.schema.predicate(ns.bse.filename), pipeline.schema.predicate(ns.bse.filesize), pipeline.schema.predicate(ns.bse.author), -- cgit v1.2.3 From 057e09d6537bf5c39815661a75819081e3e5fda7 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 13:37:59 +0100 Subject: adaptions to updates in bsfs --- test/tools/test_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/tools') diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 91bf736..a116a30 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -94,7 +94,7 @@ class TestPipeline(unittest.TestCase): pipeline = Pipeline(self.prefix, self.ext2rdr) # build objects for tests content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash) + subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) p_filesize = pipeline.schema.predicate(ns.bse.filesize) -- cgit v1.2.3