aboutsummaryrefslogtreecommitdiffstats
path: root/test/lib
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-02-08 19:25:19 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-02-08 19:25:19 +0100
commit7bf6b33fa6d6b901e4933bfe0b2a9939d7b3f3f3 (patch)
treed280d9d1e19e4f7a9d0d4b5405603c729e1fdcce /test/lib
parent05a841215c82ef40d4679dfc4d2c26572bd4d349 (diff)
parent0d0144466919cfb168e75c2af26d5cb74e10bfa0 (diff)
downloadbsie-7bf6b33fa6d6b901e4933bfe0b2a9939d7b3f3f3.tar.gz
bsie-7bf6b33fa6d6b901e4933bfe0b2a9939d7b3f3f3.tar.bz2
bsie-7bf6b33fa6d6b901e4933bfe0b2a9939d7b3f3f3.zip
Merge branch 'previews' into develop
Diffstat (limited to 'test/lib')
-rw-r--r--test/lib/test_bsie.py22
-rw-r--r--test/lib/test_builder.py11
-rw-r--r--test/lib/test_naming_policy.py120
-rw-r--r--test/lib/test_pipeline.py28
4 files changed, 148 insertions, 33 deletions
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
index 38e6f59..ae23c4b 100644
--- a/test/lib/test_bsie.py
+++ b/test/lib/test_bsie.py
@@ -11,7 +11,7 @@ import unittest
# bsie imports
from bsie.extractor import ExtractorBuilder
from bsie.extractor.base import SCHEMA_PREAMBLE
-from bsie.lib import PipelineBuilder
+from bsie.lib import PipelineBuilder, DefaultNamingPolicy
from bsie.reader import ReaderBuilder
from bsie.utils import bsfs, node, ns
@@ -40,13 +40,13 @@ class TestBSIE(unittest.TestCase):
)},
])
# build pipeline
- self.prefix = bsfs.Namespace('http://example.com/local/')
- pbuild = PipelineBuilder(self.prefix, rbuild, ebuild)
+ self.naming_policy = DefaultNamingPolicy(host='http://example.com/local', user='')
+ pbuild = PipelineBuilder(rbuild, ebuild)
self.pipeline = pbuild.build()
def test_construction(self):
- # pipeline only
- lib = BSIE(self.pipeline)
+ # only pipeline and naming policy
+ lib = BSIE(self.pipeline, self.naming_policy)
self.assertSetEqual(set(lib.principals), {
ns.bse.filename,
ns.bse.filesize,
@@ -70,7 +70,7 @@ class TestBSIE(unittest.TestCase):
'''))
# specify collect
- lib = BSIE(self.pipeline, collect={
+ lib = BSIE(self.pipeline, self.naming_policy, collect={
ns.bse.filesize,
ns.bse.author,
ns.bse.inexistent,
@@ -91,7 +91,7 @@ class TestBSIE(unittest.TestCase):
bsfs:unique "true"^^xsd:boolean .
'''))
# empty collect is disregarded
- lib = BSIE(self.pipeline, collect={})
+ lib = BSIE(self.pipeline, self.naming_policy, collect={})
self.assertSetEqual(set(lib.principals), {
ns.bse.filename,
ns.bse.filesize,
@@ -116,7 +116,7 @@ class TestBSIE(unittest.TestCase):
'''))
# specify discard
- lib = BSIE(self.pipeline, discard={
+ lib = BSIE(self.pipeline, self.naming_policy, discard={
ns.bse.filesize,
ns.bse.filename,
ns.bse.inexistent,
@@ -132,7 +132,7 @@ class TestBSIE(unittest.TestCase):
'''))
# specify collect and discard
- lib = BSIE(self.pipeline,
+ lib = BSIE(self.pipeline, self.naming_policy,
collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar},
discard={ns.bse.author, ns.bse.foo, ns.bse.foobar},
)
@@ -150,14 +150,14 @@ class TestBSIE(unittest.TestCase):
def test_from_file(self):
# setup
- lib = BSIE(self.pipeline)
+ lib = BSIE(self.pipeline, self.naming_policy)
self.assertSetEqual(set(lib.principals), {
ns.bse.filesize,
ns.bse.filename,
ns.bse.author,
})
content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
- subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash])
+ subject = node.Node(ns.bsfs.File, uri=f'http://example.com/local/file#{content_hash}')
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
# from_file extracts all available triples
diff --git a/test/lib/test_builder.py b/test/lib/test_builder.py
index 273d620..48e932b 100644
--- a/test/lib/test_builder.py
+++ b/test/lib/test_builder.py
@@ -21,7 +21,6 @@ from bsie.lib import PipelineBuilder
class TestPipelineBuilder(unittest.TestCase):
def test_build(self):
- prefix = bsfs.URI('http://example.com/local/file#')
c_schema = '''
bse:author rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
@@ -40,7 +39,7 @@ class TestPipelineBuilder(unittest.TestCase):
)},
])
# build pipeline
- builder = PipelineBuilder(prefix, rbuild, ebuild)
+ builder = PipelineBuilder(rbuild, ebuild)
pipeline = builder.build()
# delayed import
import bsie.reader.path
@@ -61,7 +60,7 @@ class TestPipelineBuilder(unittest.TestCase):
{'bsie.extractor.generic.path.Path': {}},
])
with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
+ pipeline = PipelineBuilder(rbuild, ebuild_err).build()
self.assertDictEqual(pipeline._ext2rdr, {
bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
@@ -71,7 +70,7 @@ class TestPipelineBuilder(unittest.TestCase):
{'bsie.extractor.generic.path.Path': {}},
])
with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild, ebuild_err).build()
+ pipeline = PipelineBuilder(rbuild, ebuild_err).build()
self.assertDictEqual(pipeline._ext2rdr, {
bsie.extractor.generic.path.Path(): bsie.reader.path.Path()})
@@ -81,7 +80,7 @@ class TestPipelineBuilder(unittest.TestCase):
old_reader = bsie.extractor.generic.path.Path.CONTENT_READER
bsie.extractor.generic.path.Path.CONTENT_READER = 'bsie.reader.foo.Foo'
# build pipeline with invalid reader reference
- pipeline = PipelineBuilder(prefix, rbuild, ebuild).build()
+ pipeline = PipelineBuilder(rbuild, ebuild).build()
self.assertDictEqual(pipeline._ext2rdr, {
bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(),
bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
@@ -92,7 +91,7 @@ class TestPipelineBuilder(unittest.TestCase):
# fail to build reader
rbuild_err = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
with self.assertLogs(logging.getLogger('bsie.lib.builder'), logging.ERROR):
- pipeline = PipelineBuilder(prefix, rbuild_err, ebuild).build()
+ pipeline = PipelineBuilder(rbuild_err, ebuild).build()
self.assertDictEqual(pipeline._ext2rdr, {
bsie.extractor.generic.path.Path(): bsie.reader.path.Path(),
bsie.extractor.generic.constant.Constant(c_schema, c_tuples): None,
diff --git a/test/lib/test_naming_policy.py b/test/lib/test_naming_policy.py
new file mode 100644
index 0000000..4861c84
--- /dev/null
+++ b/test/lib/test_naming_policy.py
@@ -0,0 +1,120 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# standard imports
+import unittest
+
+# bsie imports
+from bsie.utils import ns, errors
+from bsie.utils.bsfs import URI
+from bsie.utils.node import Node
+
+# objects to test
+from bsie.lib.naming_policy import NamingPolicy, NamingPolicyIterator, DefaultNamingPolicy
+
+
+
+## code ##
+
+class TestDefaultNamingPolicy(unittest.TestCase):
+
+ def test_handle_node(self):
+ # setup
+ policy = DefaultNamingPolicy('http://example.com', 'me')
+ # handle_node doesn't modify existing uris
+ self.assertEqual(policy.handle_node(
+ Node(ns.bsfs.Entity, uri='http://example.com/you/foo#bar')).uri,
+ URI('http://example.com/you/foo#bar'))
+ # processes bsfs:File
+ self.assertEqual(policy.handle_node(
+ Node(ns.bsfs.File, ucid='abc123cba')).uri,
+ URI('http://example.com/me/file#abc123cba'))
+ # processes bsfs:Preview
+ self.assertEqual(policy.handle_node(
+ Node(ns.bsfs.Preview, ucid='abc123cba', size=123)).uri,
+ URI('http://example.com/me/preview#abc123cba_s123'))
+ # raises an exception on unknown types
+ self.assertRaises(errors.ProgrammingError, policy.handle_node,
+ Node(ns.bsfs.Entity, ucid='abc123cba', size=123))
+
+ def test_name_file(self):
+ # setup
+ policy = DefaultNamingPolicy('http://example.com', 'me')
+ # name_file uses ucid
+ self.assertEqual(policy.name_file(
+ Node(ns.bsfs.File, ucid='123abc321')).uri,
+ URI('http://example.com/me/file#123abc321'))
+ # name_file falls back to a random guid
+ self.assertTrue(policy.name_file(
+ Node(ns.bsfs.File)).uri.startswith('http://example.com/me/file#'))
+
+ def test_name_preview(self):
+ # setup
+ policy = DefaultNamingPolicy('http://example.com', 'me')
+ # name_preview uses ucid
+ self.assertEqual(policy.name_preview(
+ Node(ns.bsfs.Preview, ucid='123abc321')).uri,
+ URI('http://example.com/me/preview#123abc321'))
+ self.assertEqual(policy.name_preview(
+ Node(ns.bsfs.Preview, ucid='123abc321', size=400)).uri,
+ URI('http://example.com/me/preview#123abc321_s400'))
+ # name_preview uses source
+ self.assertEqual(policy.name_preview(
+ Node(ns.bsfs.Preview, source=Node(ns.bsfs.File, ucid='123file321'))).uri,
+ URI('http://example.com/me/preview#123file321'))
+ self.assertEqual(policy.name_preview(
+ Node(ns.bsfs.Preview, source=Node(ns.bsfs.File, ucid='123file321'), size=300)).uri,
+ URI('http://example.com/me/preview#123file321_s300'))
+ # name_preview falls back to a random guid
+ self.assertTrue(policy.name_preview(
+ Node(ns.bsfs.Preview)).uri.startswith('http://example.com/me/preview#'))
+ self.assertTrue(policy.name_preview(
+ Node(ns.bsfs.Preview, size=200)).uri.startswith('http://example.com/me/preview#'))
+ self.assertTrue(policy.name_preview(
+ Node(ns.bsfs.Preview, size=200)).uri.endswith('_s200'))
+
+
+class TestNamingPolicyIterator(unittest.TestCase):
+
+ def test_call(self): # NOTE: We test NamingPolicy.__call__ here
+ # setup
+ policy = DefaultNamingPolicy('http://example.com', 'me')
+ # call accepts list
+ triples = [('node', 'pred', 'value'), ('node', 'pred', 'value')]
+ it = policy(triples)
+ self.assertIsInstance(it, NamingPolicyIterator)
+ self.assertEqual(it._iterable, triples)
+ self.assertEqual(it._policy, policy)
+ # call accepts iterator
+ triples = iter([('node', 'pred', 'value'), ('node', 'pred', 'value')])
+ it = policy(triples)
+ self.assertIsInstance(it, NamingPolicyIterator)
+ self.assertEqual(it._iterable, triples)
+ self.assertEqual(it._policy, policy)
+
+ def test_iter(self):
+ # setup
+ policy = DefaultNamingPolicy('http://example.com', 'me')
+ triples = [
+ (Node(ns.bsfs.File, ucid='foo'), 'predA', 'hello'),
+ (Node(ns.bsfs.Preview, ucid='bar'), 'predB', 1234),
+ (Node(ns.bsfs.Preview, ucid='hello'), 'predC', Node(ns.bsfs.File, ucid='world'))
+ ]
+ # handles nodes, handles values, ignores predicate
+ self.assertListEqual(list(policy(triples)), [
+ (Node(ns.bsfs.File, uri='http://example.com/me/file#foo'), 'predA', 'hello'),
+ (Node(ns.bsfs.Preview, uri='http://example.com/me/preview#bar'), 'predB', 1234),
+ (Node(ns.bsfs.Preview, uri='http://example.com/me/preview#hello'), 'predC',
+ Node(ns.bsfs.File, uri='http://example.com/me/file#world')),
+ ])
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py
index 8fecc74..61fddd7 100644
--- a/test/lib/test_pipeline.py
+++ b/test/lib/test_pipeline.py
@@ -48,32 +48,28 @@ class TestPipeline(unittest.TestCase):
bsie.extractor.generic.constant.Constant(csA, tupA): None,
bsie.extractor.generic.constant.Constant(csB, tupB): None,
}
- self.prefix = bsfs.Namespace('http://example.com/local/')
def test_essentials(self):
- pipeline = Pipeline(self.prefix, self.ext2rdr)
+ pipeline = Pipeline(self.ext2rdr)
self.assertEqual(str(pipeline), 'Pipeline')
self.assertEqual(repr(pipeline), 'Pipeline(...)')
def test_equality(self):
- pipeline = Pipeline(self.prefix, self.ext2rdr)
+ pipeline = Pipeline(self.ext2rdr)
# a pipeline is equivalent to itself
self.assertEqual(pipeline, pipeline)
self.assertEqual(hash(pipeline), hash(pipeline))
# identical builds are equivalent
- self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr))
- self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr)))
+ self.assertEqual(pipeline, Pipeline(self.ext2rdr))
+ self.assertEqual(hash(pipeline), hash(Pipeline(self.ext2rdr)))
- # equivalence respects prefix
- self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))
- self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)))
# equivalence respects extractors/readers
ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0}
- self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr))
- self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr)))
+ self.assertNotEqual(pipeline, Pipeline(ext2rdr))
+ self.assertNotEqual(hash(pipeline), hash(Pipeline(ext2rdr)))
# equivalence respects schema
- p2 = Pipeline(self.prefix, self.ext2rdr)
+ p2 = Pipeline(self.ext2rdr)
p2._schema = bsfs.schema.Schema()
self.assertNotEqual(pipeline, p2)
self.assertNotEqual(hash(pipeline), hash(p2))
@@ -90,10 +86,10 @@ class TestPipeline(unittest.TestCase):
def test_call(self):
# build pipeline
- pipeline = Pipeline(self.prefix, self.ext2rdr)
+ pipeline = Pipeline(self.ext2rdr)
# build objects for tests
content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
- subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash])
+ subject = node.Node(ns.bsfs.File, ucid=content_hash)
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
p_filename = pipeline.schema.predicate(ns.bse.filename)
p_filesize = pipeline.schema.predicate(ns.bse.filesize)
@@ -138,7 +134,7 @@ class TestPipeline(unittest.TestCase):
def __call__(self, path):
raise errors.ReaderError('reader error')
- pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()})
+ pipeline = Pipeline({bsie.extractor.generic.path.Path(): FaultyReader()})
with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR):
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
p_filename = pipeline.schema.predicate(ns.bse.filename)
@@ -149,7 +145,7 @@ class TestPipeline(unittest.TestCase):
def extract(self, subject, content, predicates):
raise errors.ExtractorError('extractor error')
- pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()})
+ pipeline = Pipeline({FaultyExtractor(): bsie.reader.path.Path()})
with self.assertLogs(logging.getLogger('bsie.lib.pipeline'), logging.ERROR):
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
p_filename = pipeline.schema.predicate(ns.bse.filename)
@@ -157,7 +153,7 @@ class TestPipeline(unittest.TestCase):
def test_predicates(self):
# build pipeline
- pipeline = Pipeline(self.prefix, self.ext2rdr)
+ pipeline = Pipeline(self.ext2rdr)
#
self.assertSetEqual(set(pipeline.principals), {
pipeline.schema.predicate(ns.bse.filename),