aboutsummaryrefslogtreecommitdiffstats
path: root/test/tools
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-01-16 21:37:09 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-01-16 21:37:09 +0100
commit05a841215c82ef40d4679dfc4d2c26572bd4d349 (patch)
tree9888ae0bd2345816d1ab479dd34b4c6b902c158a /test/tools
parent057e09d6537bf5c39815661a75819081e3e5fda7 (diff)
parent58aaa864f9747d27c065739256d4c6635ca9b751 (diff)
downloadbsie-05a841215c82ef40d4679dfc4d2c26572bd4d349.tar.gz
bsie-05a841215c82ef40d4679dfc4d2c26572bd4d349.tar.bz2
bsie-05a841215c82ef40d4679dfc4d2c26572bd4d349.zip
Merge branch 'mb/feature' into develop
Diffstat (limited to 'test/tools')
-rw-r--r--test/tools/__init__.py0
-rw-r--r--test/tools/test_builder.py246
-rw-r--r--test/tools/test_pipeline.py176
-rw-r--r--test/tools/testfile.t1
4 files changed, 0 insertions, 423 deletions
diff --git a/test/tools/__init__.py b/test/tools/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/test/tools/__init__.py
+++ /dev/null
diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py
deleted file mode 100644
index 62c637c..0000000
--- a/test/tools/test_builder.py
+++ /dev/null
@@ -1,246 +0,0 @@
-"""
-
-Part of the bsie test suite.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import logging
-import unittest
-
-# bsie imports
-from bsie import base
-from bsie.utils import bsfs
-
-# objects to test
-from bsie.tools.builder import ExtractorBuilder
-from bsie.tools.builder import PipelineBuilder
-from bsie.tools.builder import ReaderBuilder
-from bsie.tools.builder import _safe_load
-from bsie.tools.builder import _unpack_name
-
-
-## code ##
-
-class TestUtils(unittest.TestCase):
- def test_safe_load(self):
- # invalid module
- self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar')
- self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar')
- # partially valid module
- self.assertRaises(base.errors.LoaderError, _safe_load, 'os.foo', 'foobar')
- # invalid class
- self.assertRaises(base.errors.LoaderError, _safe_load, 'os.path', 'foo')
- # valid module and class
- cls = _safe_load('collections.abc', 'Container')
- import collections.abc
- self.assertEqual(cls, collections.abc.Container)
-
- def test_unpack_name(self):
- self.assertRaises(TypeError, _unpack_name, 123)
- self.assertRaises(TypeError, _unpack_name, None)
- self.assertRaises(ValueError, _unpack_name, '')
- self.assertRaises(ValueError, _unpack_name, 'path')
- self.assertRaises(ValueError, _unpack_name, '.Path')
- self.assertEqual(_unpack_name('path.Path'), ('path', 'Path'))
- self.assertEqual(_unpack_name('path.foo.bar.Path'), ('path.foo.bar', 'Path'))
-
-
-class TestReaderBuilder(unittest.TestCase):
- def test_build(self):
- builder = ReaderBuilder({'bsie.reader.path.Path': {}})
- # build configured reader
- cls = builder.build('bsie.reader.path.Path')
- import bsie.reader.path
- self.assertIsInstance(cls, bsie.reader.path.Path)
- # build unconfigured reader
- cls = builder.build('bsie.reader.stat.Stat')
- import bsie.reader.stat
- self.assertIsInstance(cls, bsie.reader.stat.Stat)
- # re-build previous reader (test cache)
- self.assertEqual(cls, builder.build('bsie.reader.stat.Stat'))
- # test invalid
- self.assertRaises(TypeError, builder.build, 123)
- self.assertRaises(TypeError, builder.build, None)
- self.assertRaises(ValueError, builder.build, '')
- self.assertRaises(ValueError, builder.build, 'Path')
- self.assertRaises(base.errors.BuilderError, builder.build, 'path.Path')
- # invalid config
- builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
- self.assertRaises(base.errors.BuilderError, builder.build, 'bsie.reader.stat.Stat')
- builder = ReaderBuilder({'bsie.reader.stat.Stat': 123})
- self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat')
- # no instructions
- builder = ReaderBuilder({})
- cls = builder.build('bsie.reader.stat.Stat')
- self.assertIsInstance(cls, bsie.reader.stat.Stat)
-
-
-
-class TestExtractorBuilder(unittest.TestCase):
- def test_iter(self):
- # no specifications
- self.assertListEqual(list(ExtractorBuilder([])), [])
- # some specifications
- builder = ExtractorBuilder([
- {'bsie.extractor.generic.path.Path': {}},
- {'bsie.extractor.generic.stat.Stat': {}},
- {'bsie.extractor.generic.path.Path': {}},
- ])
- self.assertListEqual(list(builder), [0, 1, 2])
-
- def test_build(self):
- # simple and repeated extractors
- builder = ExtractorBuilder([
- {'bsie.extractor.generic.path.Path': {}},
- {'bsie.extractor.generic.stat.Stat': {}},
- {'bsie.extractor.generic.path.Path': {}},
- ])
- ext = [builder.build(0), builder.build(1), builder.build(2)]
- import bsie.extractor.generic.path
- import bsie.extractor.generic.stat
- self.assertListEqual(ext, [
- bsie.extractor.generic.path.Path(),
- bsie.extractor.generic.stat.Stat(),
- bsie.extractor.generic.path.Path(),
- ])
- # out-of-bounds raises KeyError
- self.assertRaises(IndexError, builder.build, 3)
-
- # building with args
- builder = ExtractorBuilder([
- {'bsie.extractor.generic.constant.Constant': {
- 'schema': '''
- bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:string ;
- bsfs:unique "true"^^xsd:boolean .
- bse:rating rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:integer ;
- bsfs:unique "true"^^xsd:boolean .
- ''',
- 'tuples': [
- ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
- ('http://bsfs.ai/schema/Entity#rating', 123),
- ],
- }}])
- obj = builder.build(0)
- import bsie.extractor.generic.constant
- self.assertEqual(obj, bsie.extractor.generic.constant.Constant('''
- bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:string ;
- bsfs:unique "true"^^xsd:boolean .
- bse:rating rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:integer ;
- bsfs:unique "true"^^xsd:boolean .
- ''', [
- ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
- ('http://bsfs.ai/schema/Entity#rating', 123),
- ]))
-
- # building with invalid args
- self.assertRaises(base.errors.BuilderError, ExtractorBuilder(
- [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0)
- # non-dict build specification
- self.assertRaises(TypeError, ExtractorBuilder(
- [('bsie.extractor.generic.path.Path', {})]).build, 0)
- # multiple keys per build specification
- self.assertRaises(TypeError, ExtractorBuilder(
- [{'bsie.extractor.generic.path.Path': {},
- 'bsie.extractor.generic.stat.Stat': {}}]).build, 0)
- # non-dict value for kwargs
- self.assertRaises(TypeError, ExtractorBuilder(
- [{'bsie.extractor.generic.path.Path': 123}]).build, 0)
-
-
-
-
-class TestPipelineBuilder(unittest.TestCase):
- def test_build(self):
- prefix = bsfs.URI('http://example.com/local/file#')
- c_schema = '''
- bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
- rdfs:range xsd:string ;
- bsfs:unique "true"^^xsd:boolean .
- '''
- c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')]
- # prepare builders
- rbuild = ReaderBuilder({})
- ebuild = ExtractorBuilder([
- {'bsie.extractor.generic.path.Path': {}},
- {'bsie.extractor.generic.stat.Stat': {}},
- {'bsie.extractor.generic.constant.Constant': dict(
- schema=c_schema,
- tuples=c_tuples,
- )},
- ])
- # build pipeline
- builder = PipelineBuilder(prefix, rbuild, ebuild)
- pipeline = builder.build()
- # delayed import
- import bsie.reader.path
- import bsie.reader.stat
- import bsie.extractor.generic.path
- import bsie.extractor.generic.stat
- import bsie.extractor.generic.constant
- # check pipeline
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
- bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
- bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
- })
-
- # fail to load extractor
- ebuild_err = ExtractorBuilder([
- {'bsie.extractor.generic.foo.Foo': {}},
- {'bsie.extractor.generic.path.Path': {}},
- ])
- with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
-
- # fail to build extractor
- ebuild_err = ExtractorBuilder([
- {'bsie.extractor.generic.path.Path': {'foo': 123}},
- {'bsie.extractor.generic.path.Path': {}},
- ])
- with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
-
- # fail to load reader
- with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
- # switch reader of an extractor
- old_reader = bsie.extractor.generic.path.Path.CONTENT_READER
- bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo'
- # build pipeline with invalid reader reference
- pipeline = PipelineBuilder(prefix, rbuild, ebuild).build()
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
- bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
- })
- # switch back
- bsie.extractor.generic.path.Path.CONTENT_READER = old_reader
-
- # fail to build reader
- rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
- with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build()
- self.assertDictEqual(pipeline._ext2rdr, {
- bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
- bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
- })
-
-
-## main ##
-
-if __name__ == '__main__':
- unittest.main()
-
-## EOF ##
diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py
deleted file mode 100644
index a116a30..0000000
--- a/test/tools/test_pipeline.py
+++ /dev/null
@@ -1,176 +0,0 @@
-"""
-
-Part of the bsie test suite.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
-import logging
-import os
-import unittest
-
-# bsie imports
-from bsie.base import errors
-from bsie.utils import bsfs, node, ns
-import bsie.extractor.generic.constant
-import bsie.extractor.generic.path
-import bsie.extractor.generic.stat
-import bsie.reader.path
-import bsie.reader.stat
-
-# objects to test
-from bsie.tools.pipeline import Pipeline
-
-
-## code ##
-
-class TestPipeline(unittest.TestCase):
- def setUp(self):
- # constant A
- csA = '''
- bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:File ;
- rdfs:range xsd:string ;
- bsfs:unique "true"^^xsd:boolean .
- '''
- tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')]
- # constant B
- csB = '''
- bse:rating rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:File ;
- rdfs:range xsd:integer ;
- bsfs:unique "true"^^xsd:boolean .
- '''
- tupB = [('http://bsfs.ai/schema/Entity#rating', 123)]
- # extractors/readers
- self.ext2rdr = {
- bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
- bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
- bsie.extractor.generic.constant.Constant(csA, tupA): None,
- bsie.extractor.generic.constant.Constant(csB, tupB): None,
- }
- self.prefix = bsfs.Namespace('http://example.com/local/')
-
- def test_essentials(self):
- pipeline = Pipeline(self.prefix, self.ext2rdr)
- self.assertEqual(str(pipeline), 'Pipeline')
- self.assertEqual(repr(pipeline), 'Pipeline(...)')
-
- def test_equality(self):
- pipeline = Pipeline(self.prefix, self.ext2rdr)
- # a pipeline is equivalent to itself
- self.assertEqual(pipeline, pipeline)
- self.assertEqual(hash(pipeline), hash(pipeline))
- # identical builds are equivalent
- self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr))
- self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr)))
-
- # equivalence respects prefix
- self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))
- self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)))
- # equivalence respects extractors/readers
- ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0}
- self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr))
- self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr)))
-
- # equivalence respects schema
- p2 = Pipeline(self.prefix, self.ext2rdr)
- p2._schema = pipeline.schema.Empty()
- self.assertNotEqual(pipeline, p2)
- self.assertNotEqual(hash(pipeline), hash(p2))
-
- # not equal to other types
- class Foo(): pass
- self.assertNotEqual(pipeline, Foo())
- self.assertNotEqual(hash(pipeline), hash(Foo()))
- self.assertNotEqual(pipeline, 123)
- self.assertNotEqual(hash(pipeline), hash(123))
- self.assertNotEqual(pipeline, None)
- self.assertNotEqual(hash(pipeline), hash(None))
-
-
- def test_call(self):
- # build pipeline
- pipeline = Pipeline(self.prefix, self.ext2rdr)
- # build objects for tests
- content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
- subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash])
- testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
- p_filename = pipeline.schema.predicate(ns.bse.filename)
- p_filesize = pipeline.schema.predicate(ns.bse.filesize)
- p_author = pipeline.schema.predicate(ns.bse.author)
- p_rating = pipeline.schema.predicate(ns.bse.rating)
- entity = pipeline.schema.node(ns.bsfs.File)
- p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity)
-
- # extract given predicates
- self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), {
- (subject, p_filename, 'testfile.t'),
- (subject, p_filesize, 12),
- })
- self.assertSetEqual(set(pipeline(testfile, {p_author})), {
- (subject, p_author, 'Me, myself, and I'),
- })
- self.assertSetEqual(set(pipeline(testfile, {p_filename})), {
- (subject, p_filename, 'testfile.t'),
- })
- self.assertSetEqual(set(pipeline(testfile, {p_filesize})), {
- (subject, p_filesize, 12),
- })
- # extract all predicates
- self.assertSetEqual(set(pipeline(testfile)), {
- (subject, p_filename, 'testfile.t'),
- (subject, p_filesize, 12),
- (subject, p_author, 'Me, myself, and I'),
- (subject, p_rating, 123),
- })
- # invalid predicate
- self.assertSetEqual(set(pipeline(testfile, {p_invalid})), set())
- # valid/invalid predicates mixed
- self.assertSetEqual(set(pipeline(testfile, {p_filename, p_invalid})), {
- (subject, p_filename, 'testfile.t'),
- })
- # invalid path
- self.assertRaises(FileNotFoundError, list, pipeline('inexistent_file'))
- # FIXME: unreadable file (e.g. permissions error)
-
- def test_call_reader_err(self):
- class FaultyReader(bsie.reader.path.Path):
- def __call__(self, path):
- raise errors.ReaderError('reader error')
-
- pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()})
- with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR):
- testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
- p_filename = pipeline.schema.predicate(ns.bse.filename)
- self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
-
- def test_call_extractor_err(self):
- class FaultyExtractor(bsie.extractor.generic.path.Path):
- def extract(self, subject, content, predicates):
- raise errors.ExtractorError('extractor error')
-
- pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()})
- with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR):
- testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
- p_filename = pipeline.schema.predicate(ns.bse.filename)
- self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
-
- def test_predicates(self):
- # build pipeline
- pipeline = Pipeline(self.prefix, self.ext2rdr)
- #
- self.assertSetEqual(set(pipeline.principals), {
- pipeline.schema.predicate(ns.bse.filename),
- pipeline.schema.predicate(ns.bse.filesize),
- pipeline.schema.predicate(ns.bse.author),
- pipeline.schema.predicate(ns.bse.rating),
- })
-
-
-## main ##
-
-if __name__ == '__main__':
- unittest.main()
-
-## EOF ##
diff --git a/test/tools/testfile.t b/test/tools/testfile.t
deleted file mode 100644
index 3b18e51..0000000
--- a/test/tools/testfile.t
+++ /dev/null
@@ -1 +0,0 @@
-hello world