From 266c2c9a072bf3289fd7f2d75278b7d59528378c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 24 Dec 2022 10:27:09 +0100 Subject: package restructuring: base * Reader and Extractor to respective reader/extractor modules * ReaderBuilder to reader module * ExtractorBuilder to extractor module * Loading module in utils (safe_load, unpack_name) * Pipeline and PipelineBuilder to lib module * errors to utils * documentation: "standard import" and "external import" --- test/lib/test_pipeline.py | 175 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 test/lib/test_pipeline.py (limited to 'test/lib/test_pipeline.py') diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py new file mode 100644 index 0000000..c6f7aba --- /dev/null +++ b/test/lib/test_pipeline.py @@ -0,0 +1,175 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# standard imports +import logging +import os +import unittest + +# bsie imports +from bsie.utils import bsfs, errors, node, ns +import bsie.extractor.generic.constant +import bsie.extractor.generic.path +import bsie.extractor.generic.stat +import bsie.reader.path +import bsie.reader.stat + +# objects to test +from bsie.lib.pipeline import Pipeline + + +## code ## + +class TestPipeline(unittest.TestCase): + def setUp(self): + # constant A + csA = ''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''' + tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] + # constant B + csB = ''' + bse:rating rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:File ; + rdfs:range xsd:integer ; + bsfs:unique "true"^^xsd:boolean . + ''' + tupB = [('http://bsfs.ai/schema/Entity#rating', 123)] + # extractors/readers + self.ext2rdr = { + bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), + bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), + bsie.extractor.generic.constant.Constant(csA, tupA): None, + bsie.extractor.generic.constant.Constant(csB, tupB): None, + } + self.prefix = bsfs.Namespace('http://example.com/local/') + + def test_essentials(self): + pipeline = Pipeline(self.prefix, self.ext2rdr) + self.assertEqual(str(pipeline), 'Pipeline') + self.assertEqual(repr(pipeline), 'Pipeline(...)') + + def test_equality(self): + pipeline = Pipeline(self.prefix, self.ext2rdr) + # a pipeline is equivalent to itself + self.assertEqual(pipeline, pipeline) + self.assertEqual(hash(pipeline), hash(pipeline)) + # identical builds are equivalent + self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr)) + self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr))) + + # equivalence respects prefix + self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))) + # equivalence respects extractors/readers + ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0} + self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr))) + + # equivalence respects schema + p2 = Pipeline(self.prefix, self.ext2rdr) + p2._schema = pipeline.schema.Empty() + self.assertNotEqual(pipeline, p2) + self.assertNotEqual(hash(pipeline), hash(p2)) + + # not equal to other types + class Foo(): pass + self.assertNotEqual(pipeline, Foo()) + self.assertNotEqual(hash(pipeline), hash(Foo())) + self.assertNotEqual(pipeline, 123) + self.assertNotEqual(hash(pipeline), hash(123)) + self.assertNotEqual(pipeline, None) + self.assertNotEqual(hash(pipeline), hash(None)) + + + def test_call(self): + # build pipeline + pipeline = Pipeline(self.prefix, self.ext2rdr) + # build objects for tests + content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' + subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + p_filesize = pipeline.schema.predicate(ns.bse.filesize) + p_author = pipeline.schema.predicate(ns.bse.author) + p_rating = pipeline.schema.predicate(ns.bse.rating) + entity = pipeline.schema.node(ns.bsfs.File) + p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity) + + # extract given predicates + self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), { + (subject, p_filename, 'testfile.t'), + (subject, p_filesize, 12), + }) + self.assertSetEqual(set(pipeline(testfile, {p_author})), { + (subject, p_author, 'Me, myself, and I'), + }) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), { + (subject, p_filename, 'testfile.t'), + }) + self.assertSetEqual(set(pipeline(testfile, {p_filesize})), { + (subject, p_filesize, 12), + }) + # extract all predicates + self.assertSetEqual(set(pipeline(testfile)), { + (subject, p_filename, 'testfile.t'), + (subject, p_filesize, 12), + (subject, p_author, 'Me, myself, and I'), + (subject, p_rating, 123), + }) + # invalid predicate + self.assertSetEqual(set(pipeline(testfile, {p_invalid})), set()) + # valid/invalid predicates mixed + self.assertSetEqual(set(pipeline(testfile, {p_filename, p_invalid})), { + (subject, p_filename, 'testfile.t'), + }) + # invalid path + self.assertRaises(FileNotFoundError, list, pipeline('inexistent_file')) + # FIXME: unreadable file (e.g. permissions error) + + def test_call_reader_err(self): + class FaultyReader(bsie.reader.path.Path): + def __call__(self, path): + raise errors.ReaderError('reader error') + + pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()}) + with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR): + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) + + def test_call_extractor_err(self): + class FaultyExtractor(bsie.extractor.generic.path.Path): + def extract(self, subject, content, predicates): + raise errors.ExtractorError('extractor error') + + pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()}) + with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR): + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + p_filename = pipeline.schema.predicate(ns.bse.filename) + self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) + + def test_predicates(self): + # build pipeline + pipeline = Pipeline(self.prefix, self.ext2rdr) + # + self.assertSetEqual(set(pipeline.principals), { + pipeline.schema.predicate(ns.bse.filename), + pipeline.schema.predicate(ns.bse.filesize), + pipeline.schema.predicate(ns.bse.author), + pipeline.schema.predicate(ns.bse.rating), + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From a0d1af36bdc09fe8eebe0c87a3f587395908ae28 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 14:43:08 +0100 Subject: bsfs changes propagated to tests --- test/lib/test_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test/lib/test_pipeline.py') diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py index c6f7aba..8fecc74 100644 --- a/test/lib/test_pipeline.py +++ b/test/lib/test_pipeline.py @@ -74,7 +74,7 @@ class TestPipeline(unittest.TestCase): # equivalence respects schema p2 = Pipeline(self.prefix, self.ext2rdr) - p2._schema = pipeline.schema.Empty() + p2._schema = bsfs.schema.Schema() self.assertNotEqual(pipeline, p2) self.assertNotEqual(hash(pipeline), hash(p2)) @@ -100,7 +100,7 @@ class TestPipeline(unittest.TestCase): p_author = pipeline.schema.predicate(ns.bse.author) p_rating = pipeline.schema.predicate(ns.bse.rating) entity = pipeline.schema.node(ns.bsfs.File) - p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity) + p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, range=entity) # extract given predicates self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), { -- cgit v1.2.3 From 9c26a5ef759b010d8cf4384b0515cc188b885d81 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 17:44:00 +0100 Subject: node naming policy --- test/lib/test_pipeline.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'test/lib/test_pipeline.py') diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py index 8fecc74..61fddd7 100644 --- a/test/lib/test_pipeline.py +++ b/test/lib/test_pipeline.py @@ -48,32 +48,28 @@ class TestPipeline(unittest.TestCase): bsie.extractor.generic.constant.Constant(csA, tupA): None, bsie.extractor.generic.constant.Constant(csB, tupB): None, } - self.prefix = bsfs.Namespace('http://example.com/local/') def test_essentials(self): - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) self.assertEqual(str(pipeline), 'Pipeline') self.assertEqual(repr(pipeline), 'Pipeline(...)') def test_equality(self): - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) # a pipeline is equivalent to itself self.assertEqual(pipeline, pipeline) self.assertEqual(hash(pipeline), hash(pipeline)) # identical builds are equivalent - self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr)) - self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr))) + self.assertEqual(pipeline, Pipeline(self.ext2rdr)) + self.assertEqual(hash(pipeline), hash(Pipeline(self.ext2rdr))) - # equivalence respects prefix - self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)) - self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))) # equivalence respects extractors/readers ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0} - self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr)) - self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr))) + self.assertNotEqual(pipeline, Pipeline(ext2rdr)) + self.assertNotEqual(hash(pipeline), hash(Pipeline(ext2rdr))) # equivalence respects schema - p2 = Pipeline(self.prefix, self.ext2rdr) + p2 = Pipeline(self.ext2rdr) p2._schema = bsfs.schema.Schema() self.assertNotEqual(pipeline, p2) self.assertNotEqual(hash(pipeline), hash(p2)) @@ -90,10 +86,10 @@ class TestPipeline(unittest.TestCase): def test_call(self): # build pipeline - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) # build objects for tests content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) + subject = node.Node(ns.bsfs.File, ucid=content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) p_filesize = pipeline.schema.predicate(ns.bse.filesize) @@ -138,7 +134,7 @@ class TestPipeline(unittest.TestCase): def __call__(self, path): raise errors.ReaderError('reader error') - pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()}) + pipeline = Pipeline({bsie.extractor.generic.path.Path(): FaultyReader()}) with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR): testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) @@ -149,7 +145,7 @@ class TestPipeline(unittest.TestCase): def extract(self, subject, content, predicates): raise errors.ExtractorError('extractor error') - pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()}) + pipeline = Pipeline({FaultyExtractor(): bsie.reader.path.Path()}) with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR): testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) @@ -157,7 +153,7 @@ class TestPipeline(unittest.TestCase): def test_predicates(self): # build pipeline - pipeline = Pipeline(self.prefix, self.ext2rdr) + pipeline = Pipeline(self.ext2rdr) # self.assertSetEqual(set(pipeline.principals), { pipeline.schema.predicate(ns.bse.filename), -- cgit v1.2.3 From 4b5c4d486bb4f0f4da2e25ad464e8336a781cdcb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 22:31:03 +0100 Subject: removed module header stubs --- test/lib/test_pipeline.py | 5 ----- 1 file changed, 5 deletions(-) (limited to 'test/lib/test_pipeline.py') diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py index 61fddd7..5125a5c 100644 --- a/test/lib/test_pipeline.py +++ b/test/lib/test_pipeline.py @@ -1,9 +1,4 @@ -""" -Part of the bsie test suite. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import logging import os -- cgit v1.2.3 From d2052e77210e0ace2c5f06e48afe2a8acb412965 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 4 Mar 2023 13:41:13 +0100 Subject: namespace refactoring and cleanup --- test/lib/test_pipeline.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'test/lib/test_pipeline.py') diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py index 5125a5c..eb088a9 100644 --- a/test/lib/test_pipeline.py +++ b/test/lib/test_pipeline.py @@ -23,19 +23,19 @@ class TestPipeline(unittest.TestCase): # constant A csA = ''' bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''' - tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] + tupA = [('https://schema.bsfs.io/ie/Node/Entity#author', 'Me, myself, and I')] # constant B csB = ''' bse:rating rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:File ; + rdfs:domain bsn:Entity ; rdfs:range xsd:integer ; bsfs:unique "true"^^xsd:boolean . ''' - tupB = [('http://bsfs.ai/schema/Entity#rating', 123)] + tupB = [('https://schema.bsfs.io/ie/Node/Entity#rating', 123)] # extractors/readers self.ext2rdr = { bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), @@ -84,13 +84,13 @@ class TestPipeline(unittest.TestCase): pipeline = Pipeline(self.ext2rdr) # build objects for tests content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, ucid=content_hash) + subject = node.Node(ns.bsn.Entity, ucid=content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) p_filesize = pipeline.schema.predicate(ns.bse.filesize) p_author = pipeline.schema.predicate(ns.bse.author) p_rating = pipeline.schema.predicate(ns.bse.rating) - entity = pipeline.schema.node(ns.bsfs.File) + entity = pipeline.schema.node(ns.bsn.Entity) p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, range=entity) # extract given predicates -- cgit v1.2.3