aboutsummaryrefslogtreecommitdiffstats
path: root/test/lib
diff options
context:
space:
mode:
Diffstat (limited to 'test/lib')
-rw-r--r--test/lib/test_bsie.py24
-rw-r--r--test/lib/test_builder.py107
-rw-r--r--test/lib/test_pipeline.py175
3 files changed, 295 insertions, 11 deletions
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
index 771a0c2..38e6f59 100644
--- a/test/lib/test_bsie.py
+++ b/test/lib/test_bsie.py
@@ -4,13 +4,15 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import unittest
# bsie imports
-from bsie.base import extractor
-from bsie.tools import builder
+from bsie.extractor import ExtractorBuilder
+from bsie.extractor.base import SCHEMA_PREAMBLE
+from bsie.lib import PipelineBuilder
+from bsie.reader import ReaderBuilder
from bsie.utils import bsfs, node, ns
# objects to test
@@ -22,9 +24,9 @@ from bsie.lib.bsie import BSIE
class TestBSIE(unittest.TestCase):
def setUp(self):
# reader builder
- rbuild = builder.ReaderBuilder({})
+ rbuild = ReaderBuilder({})
# extractor builder
- ebuild = builder.ExtractorBuilder([
+ ebuild = ExtractorBuilder([
{'bsie.extractor.generic.path.Path': {}},
{'bsie.extractor.generic.stat.Stat': {}},
{'bsie.extractor.generic.constant.Constant': dict(
@@ -39,7 +41,7 @@ class TestBSIE(unittest.TestCase):
])
# build pipeline
self.prefix = bsfs.Namespace('http://example.com/local/')
- pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild)
+ pbuild = PipelineBuilder(self.prefix, rbuild, ebuild)
self.pipeline = pbuild.build()
def test_construction(self):
@@ -50,7 +52,7 @@ class TestBSIE(unittest.TestCase):
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:string ;
@@ -77,7 +79,7 @@ class TestBSIE(unittest.TestCase):
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:integer;
@@ -95,7 +97,7 @@ class TestBSIE(unittest.TestCase):
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:string ;
@@ -122,7 +124,7 @@ class TestBSIE(unittest.TestCase):
self.assertSetEqual(set(lib.principals), {
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:author rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:string ;
@@ -137,7 +139,7 @@ class TestBSIE(unittest.TestCase):
self.assertSetEqual(set(lib.principals), {
ns.bse.filesize,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:integer;
diff --git a/test/lib/test_builder.py b/test/lib/test_builder.py
new file mode 100644
index 0000000..273d620
--- /dev/null
+++ b/test/lib/test_builder.py
@@ -0,0 +1,107 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import logging
+import unittest
+
+# bsie imports
+from bsie.extractor import ExtractorBuilder
+from bsie.reader import ReaderBuilder
+from bsie.utils import bsfs
+
+# objects to test
+from bsie.lib import PipelineBuilder
+
+
+## code ##
+
+class TestPipelineBuilder(unittest.TestCase):
+ def test_build(self):
+ prefix = bsfs.URI('http://example.com/local/file#')
+ c_schema = '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ '''
+ c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')]
+ # prepare builders
+ rbuild = ReaderBuilder({})
+ ebuild = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.constant.Constant': dict(
+ schema=c_schema,
+ tuples=c_tuples,
+ )},
+ ])
+ # build pipeline
+ builder = PipelineBuilder(prefix, rbuild, ebuild)
+ pipeline = builder.build()
+ # delayed import
+ import bsie.reader.path
+ import bsie.reader.stat
+ import bsie.extractor.generic.path
+ import bsie.extractor.generic.stat
+ import bsie.extractor.generic.constant
+ # check pipeline
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+ bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+
+ # fail to load extractor
+ ebuild_err = ExtractorBuilder([
+ {'bsie.extractor.generic.foo.Foo': {}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
+
+ # fail to build extractor
+ ebuild_err = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {'foo': 123}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
+
+ # fail to load reader
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ # switch reader of an extractor
+ old_reader = bsie.extractor.generic.path.Path.CONTENT_READER
+ bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo'
+ # build pipeline with invalid reader reference
+ pipeline = PipelineBuilder(prefix, rbuild, ebuild).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+ # switch back
+ bsie.extractor.generic.path.Path.CONTENT_READER = old_reader
+
+ # fail to build reader
+ rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py
new file mode 100644
index 0000000..8fecc74
--- /dev/null
+++ b/test/lib/test_pipeline.py
@@ -0,0 +1,175 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import logging
+import os
+import unittest
+
+# bsie imports
+from bsie.utils import bsfs, errors, node, ns
+import bsie.extractor.generic.constant
+import bsie.extractor.generic.path
+import bsie.extractor.generic.stat
+import bsie.reader.path
+import bsie.reader.stat
+
+# objects to test
+from bsie.lib.pipeline import Pipeline
+
+
+## code ##
+
+class TestPipeline(unittest.TestCase):
+ def setUp(self):
+ # constant A
+ csA = '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ '''
+ tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')]
+ # constant B
+ csB = '''
+ bse:rating rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:File ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+ '''
+ tupB = [('http://bsfs.ai/schema/Entity#rating', 123)]
+ # extractors/readers
+ self.ext2rdr = {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+ bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+ bsie.extractor.generic.constant.Constant(csA, tupA): None,
+ bsie.extractor.generic.constant.Constant(csB, tupB): None,
+ }
+ self.prefix = bsfs.Namespace('http://example.com/local/')
+
+ def test_essentials(self):
+ pipeline = Pipeline(self.prefix, self.ext2rdr)
+ self.assertEqual(str(pipeline), 'Pipeline')
+ self.assertEqual(repr(pipeline), 'Pipeline(...)')
+
+ def test_equality(self):
+ pipeline = Pipeline(self.prefix, self.ext2rdr)
+ # a pipeline is equivalent to itself
+ self.assertEqual(pipeline, pipeline)
+ self.assertEqual(hash(pipeline), hash(pipeline))
+ # identical builds are equivalent
+ self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr))
+ self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr)))
+
+ # equivalence respects prefix
+ self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))
+ self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)))
+ # equivalence respects extractors/readers
+ ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0}
+ self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr))
+ self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr)))
+
+ # equivalence respects schema
+ p2 = Pipeline(self.prefix, self.ext2rdr)
+ p2._schema = bsfs.schema.Schema()
+ self.assertNotEqual(pipeline, p2)
+ self.assertNotEqual(hash(pipeline), hash(p2))
+
+ # not equal to other types
+ class Foo(): pass
+ self.assertNotEqual(pipeline, Foo())
+ self.assertNotEqual(hash(pipeline), hash(Foo()))
+ self.assertNotEqual(pipeline, 123)
+ self.assertNotEqual(hash(pipeline), hash(123))
+ self.assertNotEqual(pipeline, None)
+ self.assertNotEqual(hash(pipeline), hash(None))
+
+
+ def test_call(self):
+ # build pipeline
+ pipeline = Pipeline(self.prefix, self.ext2rdr)
+ # build objects for tests
+ content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
+ subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash])
+ testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
+ p_filename = pipeline.schema.predicate(ns.bse.filename)
+ p_filesize = pipeline.schema.predicate(ns.bse.filesize)
+ p_author = pipeline.schema.predicate(ns.bse.author)
+ p_rating = pipeline.schema.predicate(ns.bse.rating)
+ entity = pipeline.schema.node(ns.bsfs.File)
+ p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, range=entity)
+
+ # extract given predicates
+ self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), {
+ (subject, p_filename, 'testfile.t'),
+ (subject, p_filesize, 12),
+ })
+ self.assertSetEqual(set(pipeline(testfile, {p_author})), {
+ (subject, p_author, 'Me, myself, and I'),
+ })
+ self.assertSetEqual(set(pipeline(testfile, {p_filename})), {
+ (subject, p_filename, 'testfile.t'),
+ })
+ self.assertSetEqual(set(pipeline(testfile, {p_filesize})), {
+ (subject, p_filesize, 12),
+ })
+ # extract all predicates
+ self.assertSetEqual(set(pipeline(testfile)), {
+ (subject, p_filename, 'testfile.t'),
+ (subject, p_filesize, 12),
+ (subject, p_author, 'Me, myself, and I'),
+ (subject, p_rating, 123),
+ })
+ # invalid predicate
+ self.assertSetEqual(set(pipeline(testfile, {p_invalid})), set())
+ # valid/invalid predicates mixed
+ self.assertSetEqual(set(pipeline(testfile, {p_filename, p_invalid})), {
+ (subject, p_filename, 'testfile.t'),
+ })
+ # invalid path
+ self.assertRaises(FileNotFoundError, list, pipeline('inexistent_file'))
+ # FIXME: unreadable file (e.g. permissions error)
+
+ def test_call_reader_err(self):
+ class FaultyReader(bsie.reader.path.Path):
+ def __call__(self, path):
+ raise errors.ReaderError('reader error')
+
+ pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()})
+ with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR):
+ testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
+ p_filename = pipeline.schema.predicate(ns.bse.filename)
+ self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
+
+ def test_call_extractor_err(self):
+ class FaultyExtractor(bsie.extractor.generic.path.Path):
+ def extract(self, subject, content, predicates):
+ raise errors.ExtractorError('extractor error')
+
+ pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()})
+ with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR):
+ testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
+ p_filename = pipeline.schema.predicate(ns.bse.filename)
+ self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
+
+ def test_predicates(self):
+ # build pipeline
+ pipeline = Pipeline(self.prefix, self.ext2rdr)
+ #
+ self.assertSetEqual(set(pipeline.principals), {
+ pipeline.schema.predicate(ns.bse.filename),
+ pipeline.schema.predicate(ns.bse.filesize),
+ pipeline.schema.predicate(ns.bse.author),
+ pipeline.schema.predicate(ns.bse.rating),
+ })
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##