aboutsummaryrefslogtreecommitdiffstats
path: root/test
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2022-11-25 14:59:17 +0100
committerMatthias Baumgartner <dev@igsor.net>2022-11-25 14:59:17 +0100
commita294bbe0622911bcd6df37c38865a4c0eb290593 (patch)
treef038ed8d4f04c63991939e13e61ae170de4e2c57 /test
parent9389c741bdbbca9adbff6099d440706cd63deac4 (diff)
parent3e6a69ce7f109f0fd4352507ad60d58d4cbd24a7 (diff)
downloadbsie-a294bbe0622911bcd6df37c38865a4c0eb290593.tar.gz
bsie-a294bbe0622911bcd6df37c38865a4c0eb290593.tar.bz2
bsie-a294bbe0622911bcd6df37c38865a4c0eb290593.zip
Merge branch 'mb/tools' into develop
Diffstat (limited to 'test')
-rw-r--r--test/base/__init__.py0
-rw-r--r--test/base/test_extractor.py70
-rw-r--r--test/base/test_reader.py45
-rw-r--r--test/extractor/generic/test_constant.py100
-rw-r--r--test/extractor/generic/test_path.py53
-rw-r--r--test/extractor/generic/test_stat.py48
-rw-r--r--test/tools/__init__.py0
-rw-r--r--test/tools/test_builder.py247
-rw-r--r--test/tools/test_pipeline.py167
-rw-r--r--test/tools/testfile.t1
-rw-r--r--test/utils/__init__.py0
-rw-r--r--test/utils/test_node.py66
12 files changed, 759 insertions, 38 deletions
diff --git a/test/base/__init__.py b/test/base/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/base/__init__.py
diff --git a/test/base/test_extractor.py b/test/base/test_extractor.py
new file mode 100644
index 0000000..7a00079
--- /dev/null
+++ b/test/base/test_extractor.py
@@ -0,0 +1,70 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import unittest
+
+# bsie imports
+from bsie.utils import ns
+from bsie.utils.bsfs import schema as _schema, URI
+
+# objects to test
+from bsie.base import extractor
+
+
+## code ##
+
+class StubExtractor(extractor.Extractor):
+ def __init__(self):
+ super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ owl:maxCardinality "INF"^^xsd:number .
+ bse:comment rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ owl:maxCardinality "INF"^^xsd:number .
+ '''))
+
+ def extract(self, subject, content, predicates):
+ raise NotImplementedError()
+
+class StubSub(StubExtractor):
+ pass
+
+class TestExtractor(unittest.TestCase):
+ def test_essentials(self):
+ ext = StubExtractor()
+ self.assertEqual(str(ext), 'StubExtractor')
+ self.assertEqual(repr(ext), 'StubExtractor()')
+ self.assertEqual(ext, StubExtractor())
+ self.assertEqual(hash(ext), hash(StubExtractor()))
+
+ sub = StubSub()
+ self.assertEqual(str(sub), 'StubSub')
+ self.assertEqual(repr(sub), 'StubSub()')
+ self.assertEqual(sub, StubSub())
+ self.assertEqual(hash(sub), hash(StubSub()))
+ self.assertNotEqual(ext, sub)
+ self.assertNotEqual(hash(ext), hash(sub))
+
+ def test_predicates(self):
+ schema = _schema.Schema.Empty()
+ entity = schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
+ string = schema.literal(ns.bsfs.Literal).get_child(URI('http://www.w3.org/2001/XMLSchema#string'))
+ p_author = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.author, domain=entity, range=string)
+ p_comment = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.comment, domain=entity, range=string)
+ ext = StubExtractor()
+ self.assertSetEqual(set(ext.predicates()), {p_author, p_comment} | set(schema.predicates()))
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/base/test_reader.py b/test/base/test_reader.py
new file mode 100644
index 0000000..802b314
--- /dev/null
+++ b/test/base/test_reader.py
@@ -0,0 +1,45 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import unittest
+
+# objects to test
+from bsie.base import reader
+
+
+## code ##
+
+class StubReader(reader.Reader):
+ def __call__(self, path):
+ raise NotImplementedError()
+
+class StubSub(StubReader):
+ pass
+
+class TestReader(unittest.TestCase):
+ def test_essentials(self):
+ ext = StubReader()
+ self.assertEqual(str(ext), 'StubReader')
+ self.assertEqual(repr(ext), 'StubReader()')
+ self.assertEqual(ext, StubReader())
+ self.assertEqual(hash(ext), hash(StubReader()))
+
+ sub = StubSub()
+ self.assertEqual(str(sub), 'StubSub')
+ self.assertEqual(repr(sub), 'StubSub()')
+ self.assertEqual(sub, StubSub())
+ self.assertEqual(hash(sub), hash(StubSub()))
+ self.assertNotEqual(ext, sub)
+ self.assertNotEqual(hash(ext), hash(sub))
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py
index f3ab0a3..aa33fb4 100644
--- a/test/extractor/generic/test_constant.py
+++ b/test/extractor/generic/test_constant.py
@@ -20,39 +20,101 @@ from bsie.extractor.generic.constant import Constant
class TestConstant(unittest.TestCase):
def test_extract(self):
schema = '''
- bse:author a bsfs:Predicate ;
+ bse:author rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:string ;
owl:maxCardinality "1"^^xsd:number .
-
- bse:comment a bsfs:Predicate ;
+ bse:comment rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:string ;
owl:maxCardinality "INF"^^xsd:number .
-
'''
tuples = [
(ns.bse.author, 'Me, myself, and I'),
(ns.bse.comment, 'the quick brown fox jumps over the lazy dog.'),
]
- node = Node(ns.bsfs.Entity, '') # Blank node
- predicates = (ns.bse.author, ns.bse.comment)
ext = Constant(schema, tuples)
+ node = Node(ns.bsfs.Entity, '') # Blank node
+ p_author = ext.schema.predicate(ns.bse.author)
+ p_comment = ext.schema.predicate(ns.bse.comment)
+ entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
+ string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string)
# baseline
- self.assertSetEqual(set(ext.extract(node, None, predicates)),
- {(node, pred, value) for pred, value in tuples})
+ self.assertSetEqual(set(ext.extract(node, None, (p_author, p_comment))),
+ {(node, p_author, 'Me, myself, and I'),
+ (node, p_comment, 'the quick brown fox jumps over the lazy dog.')})
# predicates is respected
- self.assertSetEqual(set(ext.extract(node, None, (ns.bse.author, ns.bse.foobar))),
- {(node, ns.bse.author, 'Me, myself, and I')})
- self.assertSetEqual(set(ext.extract(node, None, (ns.bse.comment, ns.bse.foobar))),
- {(node, ns.bse.comment, 'the quick brown fox jumps over the lazy dog.')})
- self.assertSetEqual(set(ext.extract(node, None, (ns.bse.foobar, ns.bse.barfoo))), set())
-
- # FIXME: should change!
- # for now: no schema compliance
- ext = Constant('', tuples)
- self.assertSetEqual(set(ext.extract(node, None, predicates)),
- {(node, pred, value) for pred, value in tuples})
+ p_foobar = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foobar, domain=entity, range=entity)
+ self.assertSetEqual(set(ext.extract(node, None, (p_author, p_foobar))),
+ {(node, p_author, 'Me, myself, and I')})
+ self.assertSetEqual(set(ext.extract(node, None, (p_comment, p_foobar))),
+ {(node, p_comment, 'the quick brown fox jumps over the lazy dog.')})
+ p_barfoo = ext.schema.predicate(ns.bse.author).get_child(ns.bse.comment, domain=entity, range=string)
+ self.assertSetEqual(set(ext.extract(node, None, (p_foobar, p_barfoo))), set())
+
+ def test_construct(self):
+ # schema compliance
+ schema = '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ owl:maxCardinality "1"^^xsd:number .
+ bse:comment rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ owl:maxCardinality "INF"^^xsd:number .
+ '''
+ # can create a schema
+ self.assertIsInstance(Constant(schema, [
+ (ns.bse.author, 'Me, myself, and I'),
+ (ns.bse.comment, 'the quick brown fox jumps over the lazy dog.'),
+ ]), Constant)
+ # predicates are validated
+ self.assertRaises(KeyError, Constant, schema, [
+ (ns.bse.author, 'Me, myself, and I'),
+ (ns.bse.foobar, 'foobar!')])
+ # FIXME: values are validated
+ #class Foo(): pass # not string compatible
+ #self.assertRaises(ValueError, Constant, schema, [
+ # (ns.bse.author, Foo())])
+
+ def test_eq(self):
+ schema_a = '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ owl:maxCardinality "1"^^xsd:number .
+ '''
+ schema_b = '''
+ bse:comment rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ owl:maxCardinality "INF"^^xsd:number .
+ '''
+ tuples_a = [(ns.bse.author, 'Me, myself, and I')]
+ tuples_b = [(ns.bse.comment, 'the quick brown fox jumps over the lazy dog.') ]
+ # distinct instances, same data
+ self.assertEqual(
+ Constant(schema_a, tuples_a),
+ Constant(schema_a, tuples_a))
+ self.assertEqual(
+ hash(Constant(schema_a, tuples_a)),
+ hash(Constant(schema_a, tuples_a)))
+ # different data
+ self.assertNotEqual(
+ Constant(schema_a, tuples_a),
+ Constant(schema_b, tuples_b))
+ self.assertNotEqual(
+ hash(Constant(schema_a, tuples_a)),
+ hash(Constant(schema_b, tuples_b)))
+ # different objects
+ class Foo(): pass
+ self.assertNotEqual(Constant(schema_a, tuples_a), Foo())
+ self.assertNotEqual(hash(Constant(schema_a, tuples_a)), hash(Foo()))
+ self.assertNotEqual(Constant(schema_a, tuples_a), 123)
+ self.assertNotEqual(hash(Constant(schema_a, tuples_a)), hash(123))
+ self.assertNotEqual(Constant(schema_a, tuples_a), None)
+ self.assertNotEqual(hash(Constant(schema_a, tuples_a)), hash(None))
## main ##
diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py
index 8623490..9376c7c 100644
--- a/test/extractor/generic/test_path.py
+++ b/test/extractor/generic/test_path.py
@@ -8,7 +8,9 @@ Author: Matthias Baumgartner, 2022
import unittest
# bsie imports
+from bsie import base
from bsie.utils import ns
+from bsie.utils.bsfs import schema
from bsie.utils.node import Node
# objects to test
@@ -18,23 +20,52 @@ from bsie.extractor.generic.path import Path
## code ##
class TestPath(unittest.TestCase):
+ def test_eq(self):
+ # distinct instances, same data
+ self.assertEqual(Path(), Path())
+ # different classes
+ class Foo(): pass
+ self.assertNotEqual(Path(), Foo())
+ self.assertNotEqual(Path(), 123)
+ self.assertNotEqual(Path(), None)
+
+ def test_schema(self):
+ self.assertEqual(Path().schema,
+ schema.Schema.from_string(base.extractor.SCHEMA_PREAMBLE + '''
+ bse:filename rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ owl:maxCardinality "INF"^^xsd:number .
+ '''))
+
def test_extract(self):
- node = Node(ns.bsfs.Entity, '') # Blank node
ext = Path()
+ node = Node(ns.bsfs.Entity, '') # Blank node
+ content = '/tmp/foo/bar'
+ p_filename = ext.schema.predicate(ns.bse.filename)
+ entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
+ string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string)
# baseline
- self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.filename, ))),
- {(node, ns.bse.filename, 'bar')})
+ self.assertSetEqual(set(ext.extract(node, content, (p_filename, ))),
+ {(node, p_filename, 'bar')})
# predicates parameter is respected
- self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.filename, ns.bse.foo))),
- {(node, ns.bse.filename, 'bar')})
- self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.foo, ))), set())
+ p_foo = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, domain=entity, range=string) # unsupported predicate
+ self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_foo))),
+ {(node, p_filename, 'bar')})
+ self.assertSetEqual(set(ext.extract(node, content, (p_foo, ))), set())
+ # predicates are validated
+ p_bar = p_foo.get_child(ns.bse.filename) # same URI but different hierarchy
+ self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_bar))),
+ {(node, p_filename, 'bar')})
+ self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set())
# path variations
- self.assertSetEqual(set(ext.extract(node, 'bar', (ns.bse.filename, ))),
- {(node, ns.bse.filename, 'bar')})
- self.assertSetEqual(set(ext.extract(node, '', (ns.bse.filename, ))),
- {(node, ns.bse.filename, '')})
- self.assertSetEqual(set(ext.extract(node, None, (ns.bse.filename, ))), set())
+ self.assertSetEqual(set(ext.extract(node, 'bar', (p_filename, ))),
+ {(node, p_filename, 'bar')})
+ self.assertSetEqual(set(ext.extract(node, '', (p_filename, ))),
+ {(node, p_filename, '')})
+ # errors are suppressed
+ self.assertSetEqual(set(ext.extract(node, None, (p_filename, ))), set())
## main ##
diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py
index f89b053..26dad6a 100644
--- a/test/extractor/generic/test_stat.py
+++ b/test/extractor/generic/test_stat.py
@@ -9,7 +9,9 @@ import os
import unittest
# bsie imports
+from bsie import base
from bsie.utils import ns
+from bsie.utils.bsfs import schema
from bsie.utils.node import Node
# objects to test
@@ -18,21 +20,51 @@ from bsie.extractor.generic.stat import Stat
## code ##
-class TestConstant(unittest.TestCase):
+class TestStat(unittest.TestCase):
+ def test_eq(self):
+ # distinct instances, same data
+ self.assertEqual(Stat(), Stat())
+ # different classes
+ class Foo(): pass
+ self.assertNotEqual(Stat(), Foo())
+ self.assertNotEqual(Stat(), 123)
+ self.assertNotEqual(Stat(), None)
+
+ def test_schema(self):
+ self.assertEqual(Stat().schema,
+ schema.Schema.from_string(base.extractor.SCHEMA_PREAMBLE + '''
+ bse:filesize rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer ;
+ owl:maxCardinality "INF"^^xsd:number .
+ '''))
+
def test_extract(self):
+ ext = Stat()
node = Node(ns.bsfs.Entity, '') # Blank node
content = os.stat(__file__)
- ext = Stat()
+ p_filesize = ext.schema.predicate(ns.bse.filesize)
+ entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
+ string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string)
# baseline
- self.assertSetEqual(set(ext.extract(node, content, (ns.bse.filesize, ))),
- {(node, ns.bse.filesize, content.st_size)})
+ self.assertSetEqual(set(ext.extract(node, content, (p_filesize, ))),
+ {(node, p_filesize, content.st_size)})
# predicates parameter is respected
- self.assertSetEqual(set(ext.extract(node, content, (ns.bse.filesize, ns.bse.foo))),
- {(node, ns.bse.filesize, content.st_size)})
- self.assertSetEqual(set(ext.extract(node, content, (ns.bse.foo, ))), set())
+ p_foo = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, domain=entity, range=string) # unsupported predicate
+ self.assertSetEqual(set(ext.extract(node, content, (p_filesize, p_foo))),
+ {(node, p_filesize, content.st_size)})
+ self.assertSetEqual(set(ext.extract(node, content, (p_foo, ))), set())
+ # predicates are validated
+ p_bar = p_foo.get_child(ns.bse.filesizse) # same URI but different hierarchy
+ self.assertSetEqual(set(ext.extract(node, content, (p_filesize, p_bar))),
+ {(node, p_filesize, content.st_size)})
+ self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set())
# content variations
- self.assertSetEqual(set(ext.extract(node, None, (ns.bse.filesize, ))), set())
+ self.assertSetEqual(set(ext.extract(node, os.stat_result([12345] * len(content)), (p_filesize, p_bar))),
+ {(node, p_filesize, 12345)})
+ # errors are suppressed
+ self.assertSetEqual(set(ext.extract(node, None, (p_filesize, ))), set())
## main ##
diff --git a/test/tools/__init__.py b/test/tools/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/tools/__init__.py
diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py
new file mode 100644
index 0000000..bef0e9d
--- /dev/null
+++ b/test/tools/test_builder.py
@@ -0,0 +1,247 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import logging
+import unittest
+
+# bsie imports
+from bsie import base
+from bsie.base import errors
+from bsie.utils.bsfs import URI
+
+# objects to test
+from bsie.tools.builder import ExtractorBuilder
+from bsie.tools.builder import PipelineBuilder
+from bsie.tools.builder import ReaderBuilder
+from bsie.tools.builder import _safe_load
+from bsie.tools.builder import _unpack_name
+
+
+## code ##
+
+class TestUtils(unittest.TestCase):
+ def test_safe_load(self):
+ # invalid module
+ self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar')
+ self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar')
+ # partially valid module
+ self.assertRaises(errors.LoaderError, _safe_load, 'os.foo', 'foobar')
+ # invalid class
+ self.assertRaises(errors.LoaderError, _safe_load, 'os.path', 'foo')
+ # valid module and class
+ cls = _safe_load('collections.abc', 'Container')
+ import collections.abc
+ self.assertEqual(cls, collections.abc.Container)
+
+ def test_unpack_name(self):
+ self.assertRaises(TypeError, _unpack_name, 123)
+ self.assertRaises(TypeError, _unpack_name, None)
+ self.assertRaises(ValueError, _unpack_name, '')
+ self.assertRaises(ValueError, _unpack_name, 'path')
+ self.assertRaises(ValueError, _unpack_name, '.Path')
+ self.assertEqual(_unpack_name('path.Path'), ('path', 'Path'))
+ self.assertEqual(_unpack_name('path.foo.bar.Path'), ('path.foo.bar', 'Path'))
+
+
+class TestReaderBuilder(unittest.TestCase):
+ def test_build(self):
+ builder = ReaderBuilder({'bsie.reader.path.Path': {}})
+ # build configured reader
+ cls = builder.build('bsie.reader.path.Path')
+ import bsie.reader.path
+ self.assertIsInstance(cls, bsie.reader.path.Path)
+ # build unconfigured reader
+ cls = builder.build('bsie.reader.stat.Stat')
+ import bsie.reader.stat
+ self.assertIsInstance(cls, bsie.reader.stat.Stat)
+ # re-build previous reader (test cache)
+ self.assertEqual(cls, builder.build('bsie.reader.stat.Stat'))
+ # test invalid
+ self.assertRaises(TypeError, builder.build, 123)
+ self.assertRaises(TypeError, builder.build, None)
+ self.assertRaises(ValueError, builder.build, '')
+ self.assertRaises(ValueError, builder.build, 'Path')
+ self.assertRaises(errors.BuilderError, builder.build, 'path.Path')
+ # invalid config
+ builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
+ self.assertRaises(errors.BuilderError, builder.build, 'bsie.reader.stat.Stat')
+ builder = ReaderBuilder({'bsie.reader.stat.Stat': 123})
+ self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat')
+ # no instructions
+ builder = ReaderBuilder({})
+ cls = builder.build('bsie.reader.stat.Stat')
+ self.assertIsInstance(cls, bsie.reader.stat.Stat)
+
+
+
+class TestExtractorBuilder(unittest.TestCase):
+ def test_iter(self):
+ # no specifications
+ self.assertListEqual(list(ExtractorBuilder([])), [])
+ # some specifications
+ builder = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ self.assertListEqual(list(builder), [0, 1, 2])
+
+ def test_build(self):
+ # simple and repeated extractors
+ builder = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ ext = [builder.build(0), builder.build(1), builder.build(2)]
+ import bsie.extractor.generic.path
+ import bsie.extractor.generic.stat
+ self.assertListEqual(ext, [
+ bsie.extractor.generic.path.Path(),
+ bsie.extractor.generic.stat.Stat(),
+ bsie.extractor.generic.path.Path(),
+ ])
+ # out-of-bounds raises KeyError
+ self.assertRaises(IndexError, builder.build, 3)
+
+ # building with args
+ builder = ExtractorBuilder([
+ {'bsie.extractor.generic.constant.Constant': {
+ 'schema': '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ owl:maxCardinality "1"^^xsd:number .
+ bse:rating rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer ;
+ owl:maxCardinality "1"^^xsd:number .
+ ''',
+ 'tuples': [
+ ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
+ ('http://bsfs.ai/schema/Entity#rating', 123),
+ ],
+ }}])
+ obj = builder.build(0)
+ import bsie.extractor.generic.constant
+ self.assertEqual(obj, bsie.extractor.generic.constant.Constant('''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ owl:maxCardinality "1"^^xsd:number .
+ bse:rating rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer ;
+ owl:maxCardinality "1"^^xsd:number .
+ ''', [
+ ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
+ ('http://bsfs.ai/schema/Entity#rating', 123),
+ ]))
+
+ # building with invalid args
+ self.assertRaises(errors.BuilderError, ExtractorBuilder(
+ [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0)
+ # non-dict build specification
+ self.assertRaises(TypeError, ExtractorBuilder(
+ [('bsie.extractor.generic.path.Path', {})]).build, 0)
+ # multiple keys per build specification
+ self.assertRaises(TypeError, ExtractorBuilder(
+ [{'bsie.extractor.generic.path.Path': {},
+ 'bsie.extractor.generic.stat.Stat': {}}]).build, 0)
+ # non-dict value for kwargs
+ self.assertRaises(TypeError, ExtractorBuilder(
+ [{'bsie.extractor.generic.path.Path': 123}]).build, 0)
+
+
+
+
+class TestPipelineBuilder(unittest.TestCase):
+ def test_build(self):
+ prefix = URI('http://example.com/local/file#')
+ c_schema = '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ owl:maxCardinality "1"^^xsd:number .
+ '''
+ c_tuples = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')]
+ # prepare builders
+ rbuild = ReaderBuilder({})
+ ebuild = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.constant.Constant': dict(
+ schema=c_schema,
+ tuples=c_tuples,
+ )},
+ ])
+ # build pipeline
+ builder = PipelineBuilder(prefix, rbuild, ebuild)
+ pipeline = builder.build()
+ # delayed import
+ import bsie.reader.path
+ import bsie.reader.stat
+ import bsie.extractor.generic.path
+ import bsie.extractor.generic.stat
+ import bsie.extractor.generic.constant
+ # check pipeline
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+ bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+
+ # fail to load extractor
+ ebuild_err = ExtractorBuilder([
+ {'bsie.extractor.generic.foo.Foo': {}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
+
+ # fail to build extractor
+ ebuild_err = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {'foo': 123}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
+
+ # fail to load reader
+ with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
+ # switch reader of an extractor
+ old_reader = bsie.extractor.generic.path.Path.CONTENT_READER
+ bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo'
+ # build pipeline with invalid reader reference
+ pipeline = PipelineBuilder(prefix, rbuild, ebuild).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+ # switch back
+ bsie.extractor.generic.path.Path.CONTENT_READER = old_reader
+
+ # fail to build reader
+ rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
+ with self.assertLogs(logging.getLogger('bsie.tools.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py
new file mode 100644
index 0000000..9888d2e
--- /dev/null
+++ b/test/tools/test_pipeline.py
@@ -0,0 +1,167 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import logging
+import os
+import unittest
+
+# bsie imports
+from bsie.base import errors
+from bsie.utils import ns
+from bsie.utils.bsfs import URI
+from bsie.utils.node import Node
+import bsie.extractor.generic.constant
+import bsie.extractor.generic.path
+import bsie.extractor.generic.stat
+import bsie.reader.path
+import bsie.reader.stat
+
+# objects to test
+from bsie.tools.pipeline import Pipeline
+
+
+## code ##
+
+class TestPipeline(unittest.TestCase):
+ def setUp(self):
+ # constant A
+ csA = '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ owl:maxCardinality "1"^^xsd:number .
+ '''
+ tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')]
+ # constant B
+ csB = '''
+ bse:rating rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer ;
+ owl:maxCardinality "1"^^xsd:number .
+ '''
+ tupB = [('http://bsfs.ai/schema/Entity#rating', 123)]
+ # extractors/readers
+ self.ext2rdr = {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+ bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+ bsie.extractor.generic.constant.Constant(csA, tupA): None,
+ bsie.extractor.generic.constant.Constant(csB, tupB): None,
+ }
+ self.prefix = URI('http://example.com/local/file#')
+
+ def test_essentials(self):
+ pipeline = Pipeline(self.prefix, self.ext2rdr)
+ self.assertEqual(str(pipeline), 'Pipeline')
+ self.assertEqual(repr(pipeline), 'Pipeline(...)')
+
+ def test_equality(self):
+ pipeline = Pipeline(self.prefix, self.ext2rdr)
+ # a pipeline is equivalent to itself
+ self.assertEqual(pipeline, pipeline)
+ self.assertEqual(hash(pipeline), hash(pipeline))
+ # identical builds are equivalent
+ self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr))
+ self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr)))
+
+ # equivalence respects prefix
+ self.assertNotEqual(pipeline, Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr))
+ self.assertNotEqual(hash(pipeline), hash(Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr)))
+ # equivalence respects extractors/readers
+ ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0}
+ self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr))
+ self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr)))
+
+ # equivalence respects schema
+ p2 = Pipeline(self.prefix, self.ext2rdr)
+ p2.schema = pipeline.schema.Empty()
+ self.assertNotEqual(pipeline, p2)
+ self.assertNotEqual(hash(pipeline), hash(p2))
+
+ # not equal to other types
+ class Foo(): pass
+ self.assertNotEqual(pipeline, Foo())
+ self.assertNotEqual(hash(pipeline), hash(Foo()))
+ self.assertNotEqual(pipeline, 123)
+ self.assertNotEqual(hash(pipeline), hash(123))
+ self.assertNotEqual(pipeline, None)
+ self.assertNotEqual(hash(pipeline), hash(None))
+
+
+ def test_call(self):
+ # build pipeline
+ pipeline = Pipeline(self.prefix, self.ext2rdr)
+ # build objects for tests
+ content_hash = 'e3bb4ab54e4a50d75626a1f76814f152f4edc60a82ad724aa2aa922ca5534427'
+ subject = Node(ns.bsfs.Entity, self.prefix + content_hash)
+ testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
+ p_filename = pipeline.schema.predicate(ns.bse.filename)
+ p_filesize = pipeline.schema.predicate(ns.bse.filesize)
+ p_author = pipeline.schema.predicate(ns.bse.author)
+ p_rating = pipeline.schema.predicate(ns.bse.rating)
+ entity = pipeline.schema.node(ns.bsfs.Entity)
+ p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity)
+
+ # extract given predicates
+ self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), {
+ (subject, p_filename, 'testfile.t'),
+ (subject, p_filesize, 11),
+ })
+ self.assertSetEqual(set(pipeline(testfile, {p_author})), {
+ (subject, p_author, 'Me, myself, and I'),
+ })
+ self.assertSetEqual(set(pipeline(testfile, {p_filename})), {
+ (subject, p_filename, 'testfile.t'),
+ })
+ self.assertSetEqual(set(pipeline(testfile, {p_filesize})), {
+ (subject, p_filesize, 11),
+ })
+ # extract all predicates
+ self.assertSetEqual(set(pipeline(testfile)), {
+ (subject, p_filename, 'testfile.t'),
+ (subject, p_filesize, 11),
+ (subject, p_author, 'Me, myself, and I'),
+ (subject, p_rating, 123),
+ })
+ # invalid predicate
+ self.assertSetEqual(set(pipeline(testfile, {p_invalid})), set())
+ # valid/invalid predicates mixed
+ self.assertSetEqual(set(pipeline(testfile, {p_filename, p_invalid})), {
+ (subject, p_filename, 'testfile.t'),
+ })
+ # invalid path
+ self.assertRaises(FileNotFoundError, list, pipeline('inexistent_file'))
+ # FIXME: unreadable file (e.g. permissions error)
+
+ def test_call_reader_err(self):
+ class FaultyReader(bsie.reader.path.Path):
+ def __call__(self, path):
+ raise errors.ReaderError('reader error')
+
+ pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()})
+ with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR):
+ testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
+ p_filename = pipeline.schema.predicate(ns.bse.filename)
+ self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
+
+ def test_call_extractor_err(self):
+ class FaultyExtractor(bsie.extractor.generic.path.Path):
+ def extract(self, subject, content, predicates):
+ raise errors.ExtractorError('extractor error')
+
+ pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()})
+ with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR):
+ testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
+ p_filename = pipeline.schema.predicate(ns.bse.filename)
+ self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/tools/testfile.t b/test/tools/testfile.t
new file mode 100644
index 0000000..58bf1b8
--- /dev/null
+++ b/test/tools/testfile.t
@@ -0,0 +1 @@
+hello worl
diff --git a/test/utils/__init__.py b/test/utils/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/utils/__init__.py
diff --git a/test/utils/test_node.py b/test/utils/test_node.py
new file mode 100644
index 0000000..826f199
--- /dev/null
+++ b/test/utils/test_node.py
@@ -0,0 +1,66 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import unittest
+
+# bsie imports
+from bsie.utils.bsfs import URI
+from bsie.utils import ns
+
+# objects to test
+from bsie.utils.node import Node
+
+
+## code ##
+
+class TestNode(unittest.TestCase):
+ def test_equality(self):
+ uri = URI('http://example.com/me/entity#1234')
+ node = Node(ns.bsfs.Entity, uri)
+ # basic equivalence
+ self.assertEqual(node, Node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234')))
+ self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234'))))
+ # equality respects uri
+ self.assertNotEqual(node, Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321')))
+ self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321'))))
+ # equality respects node_type
+ self.assertNotEqual(node, Node(ns.bsfs.Foo, uri))
+ self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Foo, uri)))
+ # not equal to other types
+ self.assertNotEqual(node, 1234)
+ self.assertNotEqual(hash(node), hash(1234))
+ self.assertNotEqual(node, uri)
+ self.assertNotEqual(hash(node), hash(uri))
+ self.assertNotEqual(node, ns.bsfs.Entity)
+ self.assertNotEqual(hash(node), hash(ns.bsfs.Entity))
+ class Foo(): pass
+ self.assertNotEqual(node, Foo())
+ self.assertNotEqual(hash(node), hash(Foo()))
+
+ def test_str(self):
+ uri = URI('http://example.com/me/entity#1234')
+ # basic string conversion
+ node = Node(ns.bsfs.Entity, uri)
+ self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#1234)')
+ self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#1234)')
+ # string conversion respects node_type
+ node = Node(ns.bsfs.Foo, uri)
+ self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Foo, http://example.com/me/entity#1234)')
+ self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Foo, http://example.com/me/entity#1234)')
+ # string conversion respects uri
+ node = Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321'))
+ self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#4321)')
+ self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#4321)')
+
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##