aboutsummaryrefslogtreecommitdiffstats
path: root/test/extractor
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2022-12-24 10:27:09 +0100
committerMatthias Baumgartner <dev@igsor.net>2022-12-24 10:27:09 +0100
commit266c2c9a072bf3289fd7f2d75278b7d59528378c (patch)
tree60760e0fec84d5cd7b3f3efef11e3892df5cc85a /test/extractor
parented2074ae88f2db6cb6b38716b43b35e29eb2e16c (diff)
downloadbsie-266c2c9a072bf3289fd7f2d75278b7d59528378c.tar.gz
bsie-266c2c9a072bf3289fd7f2d75278b7d59528378c.tar.bz2
bsie-266c2c9a072bf3289fd7f2d75278b7d59528378c.zip
package restructuring: base
* Reader and Extractor to respective reader/extractor modules * ReaderBuilder to reader module * ExtractorBuilder to extractor module * Loading module in utils (safe_load, unpack_name) * Pipeline and PipelineBuilder to lib module * errors to utils * documentation: "standard import" and "external import"
Diffstat (limited to 'test/extractor')
-rw-r--r--test/extractor/generic/test_path.py6
-rw-r--r--test/extractor/generic/test_stat.py6
-rw-r--r--test/extractor/test_base.py70
-rw-r--r--test/extractor/test_builder.py103
4 files changed, 179 insertions, 6 deletions
diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py
index 820f402..778ac5a 100644
--- a/test/extractor/generic/test_path.py
+++ b/test/extractor/generic/test_path.py
@@ -4,11 +4,11 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import unittest
# bsie imports
-from bsie.base import extractor
+from bsie.extractor import base
from bsie.utils import bsfs, node as _node, ns
# objects to test
@@ -29,7 +29,7 @@ class TestPath(unittest.TestCase):
def test_schema(self):
self.assertEqual(Path().schema,
- bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:string ;
diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py
index 3441438..ff74085 100644
--- a/test/extractor/generic/test_stat.py
+++ b/test/extractor/generic/test_stat.py
@@ -4,12 +4,12 @@ Part of the bsie test suite.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
-# imports
+# standard imports
import os
import unittest
# bsie imports
-from bsie.base import extractor
+from bsie.extractor import base
from bsie.utils import bsfs, node as _node, ns
# objects to test
@@ -30,7 +30,7 @@ class TestStat(unittest.TestCase):
def test_schema(self):
self.assertEqual(Stat().schema,
- bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:File ;
rdfs:range xsd:integer ;
diff --git a/test/extractor/test_base.py b/test/extractor/test_base.py
new file mode 100644
index 0000000..6a63c59
--- /dev/null
+++ b/test/extractor/test_base.py
@@ -0,0 +1,70 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+
+# bsie imports
+from bsie.utils import bsfs, ns
+
+# objects to test
+from bsie.extractor import base
+
+
+## code ##
+
+class StubExtractor(base.Extractor):
+ def __init__(self):
+ super().__init__(bsfs.schema.Schema.from_string(base.SCHEMA_PREAMBLE + '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "false"^^xsd:boolean .
+ bse:comment rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "false"^^xsd:boolean .
+ '''))
+
+ def extract(self, subject, content, predicates):
+ raise NotImplementedError()
+
+class StubSub(StubExtractor):
+ pass
+
+class TestExtractor(unittest.TestCase):
+ def test_essentials(self):
+ ext = StubExtractor()
+ self.assertEqual(str(ext), 'StubExtractor')
+ self.assertEqual(repr(ext), 'StubExtractor()')
+ self.assertEqual(ext, StubExtractor())
+ self.assertEqual(hash(ext), hash(StubExtractor()))
+
+ sub = StubSub()
+ self.assertEqual(str(sub), 'StubSub')
+ self.assertEqual(repr(sub), 'StubSub()')
+ self.assertEqual(sub, StubSub())
+ self.assertEqual(hash(sub), hash(StubSub()))
+ self.assertNotEqual(ext, sub)
+ self.assertNotEqual(hash(ext), hash(sub))
+
+ def test_principals(self):
+ schema = bsfs.schema.Schema.Empty()
+ entity = schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
+ string = schema.literal(ns.bsfs.Literal).get_child(bsfs.URI('http://www.w3.org/2001/XMLSchema#string'))
+ p_author = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.author, domain=entity, range=string)
+ p_comment = schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.comment, domain=entity, range=string)
+ ext = StubExtractor()
+ self.assertSetEqual(set(ext.principals),
+ {p_author, p_comment} | set(schema.predicates()) - {schema.predicate(ns.bsfs.Predicate)})
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/extractor/test_builder.py b/test/extractor/test_builder.py
new file mode 100644
index 0000000..039ea53
--- /dev/null
+++ b/test/extractor/test_builder.py
@@ -0,0 +1,103 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+
+# bsie imports
+from bsie.utils import errors
+
+# objects to test
+from bsie.extractor import ExtractorBuilder
+
+
+## code ##
+
+class TestExtractorBuilder(unittest.TestCase):
+ def test_iter(self):
+ # no specifications
+ self.assertListEqual(list(ExtractorBuilder([])), [])
+ # some specifications
+ builder = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ self.assertListEqual(list(builder), [0, 1, 2])
+
+ def test_build(self):
+ # simple and repeated extractors
+ builder = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ ext = [builder.build(0), builder.build(1), builder.build(2)]
+ import bsie.extractor.generic.path
+ import bsie.extractor.generic.stat
+ self.assertListEqual(ext, [
+ bsie.extractor.generic.path.Path(),
+ bsie.extractor.generic.stat.Stat(),
+ bsie.extractor.generic.path.Path(),
+ ])
+ # out-of-bounds raises KeyError
+ self.assertRaises(IndexError, builder.build, 3)
+
+ # building with args
+ builder = ExtractorBuilder([
+ {'bsie.extractor.generic.constant.Constant': {
+ 'schema': '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:rating rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+ ''',
+ 'tuples': [
+ ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
+ ('http://bsfs.ai/schema/Entity#rating', 123),
+ ],
+ }}])
+ obj = builder.build(0)
+ import bsie.extractor.generic.constant
+ self.assertEqual(obj, bsie.extractor.generic.constant.Constant('''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ bse:rating rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+ ''', [
+ ('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I'),
+ ('http://bsfs.ai/schema/Entity#rating', 123),
+ ]))
+
+ # building with invalid args
+ self.assertRaises(errors.BuilderError, ExtractorBuilder(
+ [{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0)
+ # non-dict build specification
+ self.assertRaises(TypeError, ExtractorBuilder(
+ [('bsie.extractor.generic.path.Path', {})]).build, 0)
+ # multiple keys per build specification
+ self.assertRaises(TypeError, ExtractorBuilder(
+ [{'bsie.extractor.generic.path.Path': {},
+ 'bsie.extractor.generic.stat.Stat': {}}]).build, 0)
+ # non-dict value for kwargs
+ self.assertRaises(TypeError, ExtractorBuilder(
+ [{'bsie.extractor.generic.path.Path': 123}]).build, 0)
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##