From 3e6a69ce7f109f0fd4352507ad60d58d4cbd24a7 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:43:12 +0100 Subject: builders and pipeline --- test/tools/test_pipeline.py | 167 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 test/tools/test_pipeline.py (limited to 'test/tools/test_pipeline.py') diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py new file mode 100644 index 0000000..9888d2e --- /dev/null +++ b/test/tools/test_pipeline.py @@ -0,0 +1,167 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import logging +import os +import unittest + +# bsie imports +from bsie.base import errors +from bsie.utils import ns +from bsie.utils.bsfs import URI +from bsie.utils.node import Node +import bsie.extractor.generic.constant +import bsie.extractor.generic.path +import bsie.extractor.generic.stat +import bsie.reader.path +import bsie.reader.stat + +# objects to test +from bsie.tools.pipeline import Pipeline + + +## code ## + +class TestPipeline(unittest.TestCase): + def setUp(self): + # constant A + csA = ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + owl:maxCardinality "1"^^xsd:number . + ''' + tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] + # constant B + csB = ''' + bse:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer ; + owl:maxCardinality "1"^^xsd:number . + ''' + tupB = [('http://bsfs.ai/schema/Entity#rating', 123)] + # extractors/readers + self.ext2rdr = { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), + bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), + bsie.extractor.generic.constant.Constant(csA, tupA): None, + bsie.extractor.generic.constant.Constant(csB, tupB): None, + } + self.prefix = URI('http://example.com/local/file#') + + def test_essentials(self): + pipeline = Pipeline(self.prefix, self.ext2rdr) + self.assertEqual(str(pipeline), 'Pipeline') + self.assertEqual(repr(pipeline), 'Pipeline(...)') + + def test_equality(self): + pipeline = Pipeline(self.prefix, self.ext2rdr) + # a pipeline is equivalent to itself + self.assertEqual(pipeline, pipeline) + self.assertEqual(hash(pipeline), hash(pipeline)) + # identical builds are equivalent + self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr)) + self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr))) + + # equivalence respects prefix + self.assertNotEqual(pipeline, Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr))) + # equivalence respects extractors/readers + ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0} + self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr))) + + # equivalence respects schema + p2 = Pipeline(self.prefix, self.ext2rdr) + p2.schema = pipeline.schema.Empty() + self.assertNotEqual(pipeline, p2) + self.assertNotEqual(hash(pipeline), hash(p2)) + + # not equal to other types + class Foo(): pass + self.assertNotEqual(pipeline, Foo()) + self.assertNotEqual(hash(pipeline), hash(Foo())) + self.assertNotEqual(pipeline, 123) + self.assertNotEqual(hash(pipeline), hash(123)) + self.assertNotEqual(pipeline, None) + self.assertNotEqual(hash(pipeline), hash(None)) + + + def test_call(self): + # build pipeline + pipeline = Pipeline(self.prefix, self.ext2rdr) + # build objects for tests + content_hash = 'e3bb4ab54e4a50d75626a1f76814f152f4edc60a82ad724aa2aa922ca5534427' + subject = Node(ns.bsfs.Entity, self.prefix + content_hash) + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + p_filesize = pipeline.schema.predicate(ns.bse.filesize) + p_author = pipeline.schema.predicate(ns.bse.author) + p_rating = pipeline.schema.predicate(ns.bse.rating) + entity = pipeline.schema.node(ns.bsfs.Entity) + p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity) + + # extract given predicates + self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), { + (subject, p_filename, 'testfile.t'), + (subject, p_filesize, 11), + }) + self.assertSetEqual(set(pipeline(testfile, {p_author})), { + (subject, p_author, 'Me, myself, and I'), + }) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), { + (subject, p_filename, 'testfile.t'), + }) + self.assertSetEqual(set(pipeline(testfile, {p_filesize})), { + (subject, p_filesize, 11), + }) + # extract all predicates + self.assertSetEqual(set(pipeline(testfile)), { + (subject, p_filename, 'testfile.t'), + (subject, p_filesize, 11), + (subject, p_author, 'Me, myself, and I'), + (subject, p_rating, 123), + }) + # invalid predicate + self.assertSetEqual(set(pipeline(testfile, {p_invalid})), set()) + # valid/invalid predicates mixed + self.assertSetEqual(set(pipeline(testfile, {p_filename, p_invalid})), { + (subject, p_filename, 'testfile.t'), + }) + # invalid path + self.assertRaises(FileNotFoundError, list, pipeline('inexistent_file')) + # FIXME: unreadable file (e.g. permissions error) + + def test_call_reader_err(self): + class FaultyReader(bsie.reader.path.Path): + def __call__(self, path): + raise errors.ReaderError('reader error') + + pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()}) + with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR): + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) + + def test_call_extractor_err(self): + class FaultyExtractor(bsie.extractor.generic.path.Path): + def extract(self, subject, content, predicates): + raise errors.ExtractorError('extractor error') + + pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()}) + with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR): + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From edc747252a04675c46059215751719b6666a77f9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 3 Dec 2022 18:57:58 +0100 Subject: adapt to schema interface update: owl:maxCardinality changed to bsfs:unique --- test/tools/test_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test/tools/test_pipeline.py') diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 9888d2e..f98b329 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -33,7 +33,7 @@ class TestPipeline(unittest.TestCase): bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . ''' tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] # constant B @@ -41,7 +41,7 @@ class TestPipeline(unittest.TestCase): bse:rating rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer ; - owl:maxCardinality "1"^^xsd:number . + bsfs:unique "true"^^xsd:boolean . ''' tupB = [('http://bsfs.ai/schema/Entity#rating', 123)] # extractors/readers -- cgit v1.2.3 From 559e643bb1fa39feefd2eb73847ad9420daf1deb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 14 Dec 2022 06:10:25 +0100 Subject: bsie extraction and info apps --- test/tools/test_pipeline.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'test/tools/test_pipeline.py') diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index f98b329..0dd8c75 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -95,7 +95,7 @@ class TestPipeline(unittest.TestCase): # build pipeline pipeline = Pipeline(self.prefix, self.ext2rdr) # build objects for tests - content_hash = 'e3bb4ab54e4a50d75626a1f76814f152f4edc60a82ad724aa2aa922ca5534427' + content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' subject = Node(ns.bsfs.Entity, self.prefix + content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) @@ -108,7 +108,7 @@ class TestPipeline(unittest.TestCase): # extract given predicates self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), { (subject, p_filename, 'testfile.t'), - (subject, p_filesize, 11), + (subject, p_filesize, 12), }) self.assertSetEqual(set(pipeline(testfile, {p_author})), { (subject, p_author, 'Me, myself, and I'), @@ -117,12 +117,12 @@ class TestPipeline(unittest.TestCase): (subject, p_filename, 'testfile.t'), }) self.assertSetEqual(set(pipeline(testfile, {p_filesize})), { - (subject, p_filesize, 11), + (subject, p_filesize, 12), }) # extract all predicates self.assertSetEqual(set(pipeline(testfile)), { (subject, p_filename, 'testfile.t'), - (subject, p_filesize, 11), + (subject, p_filesize, 12), (subject, p_author, 'Me, myself, and I'), (subject, p_rating, 123), }) @@ -158,6 +158,18 @@ class TestPipeline(unittest.TestCase): p_filename = pipeline.schema.predicate(ns.bse.filename) self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) + def test_predicates(self): + # build pipeline + pipeline = Pipeline(self.prefix, self.ext2rdr) + # + self.assertSetEqual(set(pipeline.predicates()), { + pipeline.schema.predicate(ns.bsfs.Predicate), + pipeline.schema.predicate(ns.bse.filename), + pipeline.schema.predicate(ns.bse.filesize), + pipeline.schema.predicate(ns.bse.author), + pipeline.schema.predicate(ns.bse.rating), + }) + ## main ## -- cgit v1.2.3 From 3b7fee369924eb7704709edeb8c17fff9c020dfb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:06:09 +0100 Subject: import fixes --- test/tools/test_pipeline.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'test/tools/test_pipeline.py') diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 0dd8c75..92801ed 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -11,9 +11,8 @@ import unittest # bsie imports from bsie.base import errors -from bsie.utils import ns from bsie.utils.bsfs import URI -from bsie.utils.node import Node +from bsie.utils import bsfs, node, ns import bsie.extractor.generic.constant import bsie.extractor.generic.path import bsie.extractor.generic.stat @@ -68,8 +67,8 @@ class TestPipeline(unittest.TestCase): self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr))) # equivalence respects prefix - self.assertNotEqual(pipeline, Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr)) - self.assertNotEqual(hash(pipeline), hash(Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr))) + self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))) # equivalence respects extractors/readers ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0} self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr)) @@ -96,7 +95,7 @@ class TestPipeline(unittest.TestCase): pipeline = Pipeline(self.prefix, self.ext2rdr) # build objects for tests content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = Node(ns.bsfs.Entity, self.prefix + content_hash) + subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) p_filesize = pipeline.schema.predicate(ns.bse.filesize) -- cgit v1.2.3 From 8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:12:56 +0100 Subject: file node class in default schema --- test/tools/test_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test/tools/test_pipeline.py') diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 92801ed..611f8b0 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -30,7 +30,7 @@ class TestPipeline(unittest.TestCase): # constant A csA = ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''' @@ -38,7 +38,7 @@ class TestPipeline(unittest.TestCase): # constant B csB = ''' bse:rating rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . ''' -- cgit v1.2.3 From 5d9526783ad8432c7d6dfe18c0e9f2b37950b470 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:16:25 +0100 Subject: Pipeline.prefix as Namespace instead of URI --- test/tools/test_pipeline.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'test/tools/test_pipeline.py') diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 611f8b0..e440ab5 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -11,7 +11,6 @@ import unittest # bsie imports from bsie.base import errors -from bsie.utils.bsfs import URI from bsie.utils import bsfs, node, ns import bsie.extractor.generic.constant import bsie.extractor.generic.path @@ -50,7 +49,7 @@ class TestPipeline(unittest.TestCase): bsie.extractor.generic.constant.Constant(csA, tupA): None, bsie.extractor.generic.constant.Constant(csB, tupB): None, } - self.prefix = URI('http://example.com/local/file#') + self.prefix = bsfs.Namespace('http://example.com/local/') def test_essentials(self): pipeline = Pipeline(self.prefix, self.ext2rdr) @@ -101,7 +100,7 @@ class TestPipeline(unittest.TestCase): p_filesize = pipeline.schema.predicate(ns.bse.filesize) p_author = pipeline.schema.predicate(ns.bse.author) p_rating = pipeline.schema.predicate(ns.bse.rating) - entity = pipeline.schema.node(ns.bsfs.Entity) + entity = pipeline.schema.node(ns.bsfs.File) p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity) # extract given predicates -- cgit v1.2.3 From 37510d134458bf954ca2da6d40be0d6c76661e8c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:19:21 +0100 Subject: bsie/pipeline interface revision: * predicates -> principals * schema as property * principals as property * information hiding * full subschema instead of only predicates --- test/tools/test_pipeline.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'test/tools/test_pipeline.py') diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index e440ab5..91bf736 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -75,7 +75,7 @@ class TestPipeline(unittest.TestCase): # equivalence respects schema p2 = Pipeline(self.prefix, self.ext2rdr) - p2.schema = pipeline.schema.Empty() + p2._schema = pipeline.schema.Empty() self.assertNotEqual(pipeline, p2) self.assertNotEqual(hash(pipeline), hash(p2)) @@ -160,8 +160,7 @@ class TestPipeline(unittest.TestCase): # build pipeline pipeline = Pipeline(self.prefix, self.ext2rdr) # - self.assertSetEqual(set(pipeline.predicates()), { - pipeline.schema.predicate(ns.bsfs.Predicate), + self.assertSetEqual(set(pipeline.principals), { pipeline.schema.predicate(ns.bse.filename), pipeline.schema.predicate(ns.bse.filesize), pipeline.schema.predicate(ns.bse.author), -- cgit v1.2.3 From 057e09d6537bf5c39815661a75819081e3e5fda7 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 13:37:59 +0100 Subject: adaptions to updates in bsfs --- test/tools/test_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/tools/test_pipeline.py') diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index 91bf736..a116a30 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -94,7 +94,7 @@ class TestPipeline(unittest.TestCase): pipeline = Pipeline(self.prefix, self.ext2rdr) # build objects for tests content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash) + subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) p_filesize = pipeline.schema.predicate(ns.bse.filesize) -- cgit v1.2.3