aboutsummaryrefslogtreecommitdiffstats
path: root/test/lib
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-03-05 19:22:58 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-03-05 19:22:58 +0100
commita35b33f4f1ddcf6f1bb8ab0f41b87bf2b847f11d (patch)
treefb220da28bb7248ebf37ce09af5de88f2c1aaad4 /test/lib
parent7582c280ad5324a2f0427999911c7e7abc14a6ab (diff)
parentaf81318ae9311fd0b0e16949cef3cfaf7996970b (diff)
downloadbsie-release.tar.gz
bsie-release.tar.bz2
bsie-release.zip
Merge branch 'develop'HEADv0.23.03releasemain
Diffstat (limited to 'test/lib')
-rw-r--r--test/lib/test_bsie.py83
-rw-r--r--test/lib/test_builder.py101
-rw-r--r--test/lib/test_naming_policy.py115
-rw-r--r--test/lib/test_pipeline.py166
4 files changed, 422 insertions, 43 deletions
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
index 771a0c2..0c393cc 100644
--- a/test/lib/test_bsie.py
+++ b/test/lib/test_bsie.py
@@ -1,16 +1,13 @@
-"""
-Part of the bsie test suite.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
+# standard imports
import os
import unittest
# bsie imports
-from bsie.base import extractor
-from bsie.tools import builder
+from bsie.extractor import ExtractorBuilder
+from bsie.extractor.base import SCHEMA_PREAMBLE
+from bsie.lib import PipelineBuilder, DefaultNamingPolicy
+from bsie.reader import ReaderBuilder
from bsie.utils import bsfs, node, ns
# objects to test
@@ -22,53 +19,53 @@ from bsie.lib.bsie import BSIE
class TestBSIE(unittest.TestCase):
def setUp(self):
# reader builder
- rbuild = builder.ReaderBuilder({})
+ rbuild = ReaderBuilder({})
# extractor builder
- ebuild = builder.ExtractorBuilder([
+ ebuild = ExtractorBuilder([
{'bsie.extractor.generic.path.Path': {}},
{'bsie.extractor.generic.stat.Stat': {}},
{'bsie.extractor.generic.constant.Constant': dict(
- tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')],
+ tuples=[('https://schema.bsfs.io/ie/Node/Entity#author', 'Me, myself, and I')],
schema='''
bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:string ;
bsfs:unique "true"^^xsd:boolean .
''',
)},
])
# build pipeline
- self.prefix = bsfs.Namespace('http://example.com/local/')
- pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild)
+ self.naming_policy = DefaultNamingPolicy(host='http://example.com/local', user='me')
+ pbuild = PipelineBuilder(rbuild, ebuild)
self.pipeline = pbuild.build()
def test_construction(self):
- # pipeline only
- lib = BSIE(self.pipeline)
+ # only pipeline and naming policy
+ lib = BSIE(self.pipeline, self.naming_policy)
self.assertSetEqual(set(lib.principals), {
ns.bse.filename,
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:File ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:string ;
- bsfs:unique "false"^^xsd:boolean .
+ bsfs:unique "true"^^xsd:boolean .
bse:filesize rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:File ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:integer;
- bsfs:unique "false"^^xsd:boolean .
+ bsfs:unique "true"^^xsd:boolean .
bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:string ;
bsfs:unique "true"^^xsd:boolean .
'''))
# specify collect
- lib = BSIE(self.pipeline, collect={
+ lib = BSIE(self.pipeline, self.naming_policy, collect={
ns.bse.filesize,
ns.bse.author,
ns.bse.inexistent,
@@ -77,44 +74,44 @@ class TestBSIE(unittest.TestCase):
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:File ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:integer;
- bsfs:unique "false"^^xsd:boolean .
+ bsfs:unique "true"^^xsd:boolean .
bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:string ;
bsfs:unique "true"^^xsd:boolean .
'''))
# empty collect is disregarded
- lib = BSIE(self.pipeline, collect={})
+ lib = BSIE(self.pipeline, self.naming_policy, collect={})
self.assertSetEqual(set(lib.principals), {
ns.bse.filename,
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:File ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:string ;
- bsfs:unique "false"^^xsd:boolean .
+ bsfs:unique "true"^^xsd:boolean .
bse:filesize rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:File ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:integer;
- bsfs:unique "false"^^xsd:boolean .
+ bsfs:unique "true"^^xsd:boolean .
bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:string ;
bsfs:unique "true"^^xsd:boolean .
'''))
# specify discard
- lib = BSIE(self.pipeline, discard={
+ lib = BSIE(self.pipeline, self.naming_policy, discard={
ns.bse.filesize,
ns.bse.filename,
ns.bse.inexistent,
@@ -122,40 +119,40 @@ class TestBSIE(unittest.TestCase):
self.assertSetEqual(set(lib.principals), {
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:string ;
bsfs:unique "true"^^xsd:boolean .
'''))
# specify collect and discard
- lib = BSIE(self.pipeline,
+ lib = BSIE(self.pipeline, self.naming_policy,
collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar},
discard={ns.bse.author, ns.bse.foo, ns.bse.foobar},
)
self.assertSetEqual(set(lib.principals), {
ns.bse.filesize,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:File ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:integer;
- bsfs:unique "false"^^xsd:boolean .
+ bsfs:unique "true"^^xsd:boolean .
'''))
def test_from_file(self):
# setup
- lib = BSIE(self.pipeline)
+ lib = BSIE(self.pipeline, self.naming_policy)
self.assertSetEqual(set(lib.principals), {
ns.bse.filesize,
ns.bse.filename,
ns.bse.author,
})
content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
- subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash])
+ subject = node.Node(ns.bsn.Entity, uri=f'http://example.com/local/me/file#{content_hash}')
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
# from_file extracts all available triples
diff --git a/test/lib/test_builder.py b/test/lib/test_builder.py
new file mode 100644
index 0000000..3ecb3d3
--- /dev/null
+++ b/test/lib/test_builder.py
@@ -0,0 +1,101 @@
+
+# standard imports
+import logging
+import unittest
+
+# bsie imports
+from bsie.extractor import ExtractorBuilder
+from bsie.reader import ReaderBuilder
+from bsie.utils import bsfs
+
+# objects to test
+from bsie.lib import PipelineBuilder
+
+
+## code ##
+
+class TestPipelineBuilder(unittest.TestCase):
+ def test_build(self):
+ c_schema = '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsn:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ '''
+ c_tuples = [('https://schema.bsfs.io/ie/Node/Entity#author', 'Me, myself, and I')]
+ # prepare builders
+ rbuild = ReaderBuilder({})
+ ebuild = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.constant.Constant': dict(
+ schema=c_schema,
+ tuples=c_tuples,
+ )},
+ ])
+ # build pipeline
+ builder = PipelineBuilder(rbuild, ebuild)
+ pipeline = builder.build()
+ # delayed import
+ import bsie.reader.path
+ import bsie.reader.stat
+ import bsie.extractor.generic.path
+ import bsie.extractor.generic.stat
+ import bsie.extractor.generic.constant
+ # check pipeline
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+ bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+
+ # fail to load extractor
+ ebuild_err = ExtractorBuilder([
+ {'bsie.extractor.generic.foo.Foo': {}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(rbuild, ebuild_err).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
+
+ # fail to build extractor
+ ebuild_err = ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {'foo': 123}},
+ {'bsie.extractor.generic.path.Path': {}},
+ ])
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(rbuild, ebuild_err).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
+
+ # fail to load reader
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ # switch reader of an extractor
+ old_reader = bsie.extractor.generic.path.Path.CONTENT_READER
+ bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo'
+ # build pipeline with invalid reader reference
+ pipeline = PipelineBuilder(rbuild, ebuild).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+ # switch back
+ bsie.extractor.generic.path.Path.CONTENT_READER = old_reader
+
+ # fail to build reader
+ rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
+ with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
+ pipeline = PipelineBuilder(rbuild_err, ebuild).build()
+ self.assertDictEqual(pipeline._ext2rdr, {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+ bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
+ })
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/lib/test_naming_policy.py b/test/lib/test_naming_policy.py
new file mode 100644
index 0000000..c9b0cd2
--- /dev/null
+++ b/test/lib/test_naming_policy.py
@@ -0,0 +1,115 @@
+
+# standard imports
+import unittest
+
+# bsie imports
+from bsie.utils import ns, errors
+from bsie.utils.bsfs import URI
+from bsie.utils.node import Node
+
+# objects to test
+from bsie.lib.naming_policy import NamingPolicy, NamingPolicyIterator, DefaultNamingPolicy
+
+
+
+## code ##
+
+class TestDefaultNamingPolicy(unittest.TestCase):
+
+ def test_handle_node(self):
+ # setup
+ policy = DefaultNamingPolicy('http://example.com', 'me')
+ # handle_node doesn't modify existing uris
+ self.assertEqual(policy.handle_node(
+ Node(ns.bsn.Invalid, uri='http://example.com/you/foo#bar')).uri,
+ URI('http://example.com/you/foo#bar'))
+ # processes bsfs:File
+ self.assertEqual(policy.handle_node(
+ Node(ns.bsn.Entity, ucid='abc123cba')).uri,
+ URI('http://example.com/me/file#abc123cba'))
+ # processes bsfs:Preview
+ self.assertEqual(policy.handle_node(
+ Node(ns.bsn.Preview, ucid='abc123cba', size=123)).uri,
+ URI('http://example.com/me/preview#abc123cba_s123'))
+ # raises an exception on unknown types
+ self.assertRaises(errors.ProgrammingError, policy.handle_node,
+ Node(ns.bsn.Invalid, ucid='abc123cba', size=123))
+
+ def test_name_file(self):
+ # setup
+ policy = DefaultNamingPolicy('http://example.com', 'me')
+ # name_file uses ucid
+ self.assertEqual(policy.name_file(
+ Node(ns.bsn.Entity, ucid='123abc321')).uri,
+ URI('http://example.com/me/file#123abc321'))
+ # name_file falls back to a random guid
+ self.assertTrue(policy.name_file(
+ Node(ns.bsn.Entity)).uri.startswith('http://example.com/me/file#'))
+
+ def test_name_preview(self):
+ # setup
+ policy = DefaultNamingPolicy('http://example.com', 'me')
+ # name_preview uses ucid
+ self.assertEqual(policy.name_preview(
+ Node(ns.bsn.Preview, ucid='123abc321')).uri,
+ URI('http://example.com/me/preview#123abc321'))
+ self.assertEqual(policy.name_preview(
+ Node(ns.bsn.Preview, ucid='123abc321', size=400)).uri,
+ URI('http://example.com/me/preview#123abc321_s400'))
+ # name_preview uses source
+ self.assertEqual(policy.name_preview(
+ Node(ns.bsn.Preview, source=Node(ns.bsn.Entity, ucid='123file321'))).uri,
+ URI('http://example.com/me/preview#123file321'))
+ self.assertEqual(policy.name_preview(
+ Node(ns.bsn.Preview, source=Node(ns.bsn.Entity, ucid='123file321'), size=300)).uri,
+ URI('http://example.com/me/preview#123file321_s300'))
+ # name_preview falls back to a random guid
+ self.assertTrue(policy.name_preview(
+ Node(ns.bsn.Preview)).uri.startswith('http://example.com/me/preview#'))
+ self.assertTrue(policy.name_preview(
+ Node(ns.bsn.Preview, size=200)).uri.startswith('http://example.com/me/preview#'))
+ self.assertTrue(policy.name_preview(
+ Node(ns.bsn.Preview, size=200)).uri.endswith('_s200'))
+
+
+class TestNamingPolicyIterator(unittest.TestCase):
+
+ def test_call(self): # NOTE: We test NamingPolicy.__call__ here
+ # setup
+ policy = DefaultNamingPolicy('http://example.com', 'me')
+ # call accepts list
+ triples = [('node', 'pred', 'value'), ('node', 'pred', 'value')]
+ it = policy(triples)
+ self.assertIsInstance(it, NamingPolicyIterator)
+ self.assertEqual(it._iterable, triples)
+ self.assertEqual(it._policy, policy)
+ # call accepts iterator
+ triples = iter([('node', 'pred', 'value'), ('node', 'pred', 'value')])
+ it = policy(triples)
+ self.assertIsInstance(it, NamingPolicyIterator)
+ self.assertEqual(it._iterable, triples)
+ self.assertEqual(it._policy, policy)
+
+ def test_iter(self):
+ # setup
+ policy = DefaultNamingPolicy('http://example.com', 'me')
+ triples = [
+ (Node(ns.bsn.Entity, ucid='foo'), 'predA', 'hello'),
+ (Node(ns.bsn.Preview, ucid='bar'), 'predB', 1234),
+ (Node(ns.bsn.Preview, ucid='hello'), 'predC', Node(ns.bsn.Entity, ucid='world'))
+ ]
+ # handles nodes, handles values, ignores predicate
+ self.assertListEqual(list(policy(triples)), [
+ (Node(ns.bsn.Entity, uri='http://example.com/me/file#foo'), 'predA', 'hello'),
+ (Node(ns.bsn.Preview, uri='http://example.com/me/preview#bar'), 'predB', 1234),
+ (Node(ns.bsn.Preview, uri='http://example.com/me/preview#hello'), 'predC',
+ Node(ns.bsn.Entity, uri='http://example.com/me/file#world')),
+ ])
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py
new file mode 100644
index 0000000..eb088a9
--- /dev/null
+++ b/test/lib/test_pipeline.py
@@ -0,0 +1,166 @@
+
+# standard imports
+import logging
+import os
+import unittest
+
+# bsie imports
+from bsie.utils import bsfs, errors, node, ns
+import bsie.extractor.generic.constant
+import bsie.extractor.generic.path
+import bsie.extractor.generic.stat
+import bsie.reader.path
+import bsie.reader.stat
+
+# objects to test
+from bsie.lib.pipeline import Pipeline
+
+
+## code ##
+
+class TestPipeline(unittest.TestCase):
+ def setUp(self):
+ # constant A
+ csA = '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsn:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ '''
+ tupA = [('https://schema.bsfs.io/ie/Node/Entity#author', 'Me, myself, and I')]
+ # constant B
+ csB = '''
+ bse:rating rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsn:Entity ;
+ rdfs:range xsd:integer ;
+ bsfs:unique "true"^^xsd:boolean .
+ '''
+ tupB = [('https://schema.bsfs.io/ie/Node/Entity#rating', 123)]
+ # extractors/readers
+ self.ext2rdr = {
+ bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
+ bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
+ bsie.extractor.generic.constant.Constant(csA, tupA): None,
+ bsie.extractor.generic.constant.Constant(csB, tupB): None,
+ }
+
+ def test_essentials(self):
+ pipeline = Pipeline(self.ext2rdr)
+ self.assertEqual(str(pipeline), 'Pipeline')
+ self.assertEqual(repr(pipeline), 'Pipeline(...)')
+
+ def test_equality(self):
+ pipeline = Pipeline(self.ext2rdr)
+ # a pipeline is equivalent to itself
+ self.assertEqual(pipeline, pipeline)
+ self.assertEqual(hash(pipeline), hash(pipeline))
+ # identical builds are equivalent
+ self.assertEqual(pipeline, Pipeline(self.ext2rdr))
+ self.assertEqual(hash(pipeline), hash(Pipeline(self.ext2rdr)))
+
+ # equivalence respects extractors/readers
+ ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0}
+ self.assertNotEqual(pipeline, Pipeline(ext2rdr))
+ self.assertNotEqual(hash(pipeline), hash(Pipeline(ext2rdr)))
+
+ # equivalence respects schema
+ p2 = Pipeline(self.ext2rdr)
+ p2._schema = bsfs.schema.Schema()
+ self.assertNotEqual(pipeline, p2)
+ self.assertNotEqual(hash(pipeline), hash(p2))
+
+ # not equal to other types
+ class Foo(): pass
+ self.assertNotEqual(pipeline, Foo())
+ self.assertNotEqual(hash(pipeline), hash(Foo()))
+ self.assertNotEqual(pipeline, 123)
+ self.assertNotEqual(hash(pipeline), hash(123))
+ self.assertNotEqual(pipeline, None)
+ self.assertNotEqual(hash(pipeline), hash(None))
+
+
+ def test_call(self):
+ # build pipeline
+ pipeline = Pipeline(self.ext2rdr)
+ # build objects for tests
+ content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
+ subject = node.Node(ns.bsn.Entity, ucid=content_hash)
+ testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
+ p_filename = pipeline.schema.predicate(ns.bse.filename)
+ p_filesize = pipeline.schema.predicate(ns.bse.filesize)
+ p_author = pipeline.schema.predicate(ns.bse.author)
+ p_rating = pipeline.schema.predicate(ns.bse.rating)
+ entity = pipeline.schema.node(ns.bsn.Entity)
+ p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, range=entity)
+
+ # extract given predicates
+ self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), {
+ (subject, p_filename, 'testfile.t'),
+ (subject, p_filesize, 12),
+ })
+ self.assertSetEqual(set(pipeline(testfile, {p_author})), {
+ (subject, p_author, 'Me, myself, and I'),
+ })
+ self.assertSetEqual(set(pipeline(testfile, {p_filename})), {
+ (subject, p_filename, 'testfile.t'),
+ })
+ self.assertSetEqual(set(pipeline(testfile, {p_filesize})), {
+ (subject, p_filesize, 12),
+ })
+ # extract all predicates
+ self.assertSetEqual(set(pipeline(testfile)), {
+ (subject, p_filename, 'testfile.t'),
+ (subject, p_filesize, 12),
+ (subject, p_author, 'Me, myself, and I'),
+ (subject, p_rating, 123),
+ })
+ # invalid predicate
+ self.assertSetEqual(set(pipeline(testfile, {p_invalid})), set())
+ # valid/invalid predicates mixed
+ self.assertSetEqual(set(pipeline(testfile, {p_filename, p_invalid})), {
+ (subject, p_filename, 'testfile.t'),
+ })
+ # invalid path
+ self.assertRaises(FileNotFoundError, list, pipeline('inexistent_file'))
+ # FIXME: unreadable file (e.g. permissions error)
+
+ def test_call_reader_err(self):
+ class FaultyReader(bsie.reader.path.Path):
+ def __call__(self, path):
+ raise errors.ReaderError('reader error')
+
+ pipeline = Pipeline({bsie.extractor.generic.path.Path(): FaultyReader()})
+ with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR):
+ testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
+ p_filename = pipeline.schema.predicate(ns.bse.filename)
+ self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
+
+ def test_call_extractor_err(self):
+ class FaultyExtractor(bsie.extractor.generic.path.Path):
+ def extract(self, subject, content, predicates):
+ raise errors.ExtractorError('extractor error')
+
+ pipeline = Pipeline({FaultyExtractor(): bsie.reader.path.Path()})
+ with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR):
+ testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
+ p_filename = pipeline.schema.predicate(ns.bse.filename)
+ self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
+
+ def test_predicates(self):
+ # build pipeline
+ pipeline = Pipeline(self.ext2rdr)
+ #
+ self.assertSetEqual(set(pipeline.principals), {
+ pipeline.schema.predicate(ns.bse.filename),
+ pipeline.schema.predicate(ns.bse.filesize),
+ pipeline.schema.predicate(ns.bse.author),
+ pipeline.schema.predicate(ns.bse.rating),
+ })
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##