diff options
Diffstat (limited to 'test/lib')
-rw-r--r-- | test/lib/__init__.py | 0 | ||||
-rw-r--r-- | test/lib/test_bsie.py | 231 | ||||
-rw-r--r-- | test/lib/testfile.t | 1 |
3 files changed, 232 insertions, 0 deletions
diff --git a/test/lib/__init__.py b/test/lib/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/lib/__init__.py diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py new file mode 100644 index 0000000..277ac67 --- /dev/null +++ b/test/lib/test_bsie.py @@ -0,0 +1,231 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import os +import unittest + +# bsie imports +from bsie.tools import builder +from bsie.utils import ns +from bsie.utils.bsfs import URI, schema +from bsie.utils.node import Node + +# objects to test +from bsie.lib.bsie import BSIE + + +## code ## + +class TestBSIE(unittest.TestCase): + def setUp(self): + # reader builder + rbuild = builder.ReaderBuilder({}) + # extractor builder + ebuild = builder.ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.constant.Constant': dict( + tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], + schema=''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''', + )}, + ]) + # build pipeline + self.prefix = URI('http://example.com/local/file#') + pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild) + self.pipeline = pbuild.build() + + def test_construction(self): + # pipeline only + lib = BSIE(self.pipeline) + self.assertSetEqual(lib.predicates, { + ns.bse.filename, + ns.bse.filesize, + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + + # specify collect + lib = BSIE(self.pipeline, collect={ + ns.bse.filesize, + ns.bse.author, + ns.bse.inexistent, + }) + self.assertSetEqual(lib.predicates, { + ns.bse.filesize, + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + # empty collect is disregarded + lib = BSIE(self.pipeline, collect={}) + self.assertSetEqual(lib.predicates, { + ns.bse.filename, + ns.bse.filesize, + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + + # specify discard + lib = BSIE(self.pipeline, discard={ + ns.bse.filesize, + ns.bse.filename, + ns.bse.inexistent, + }) + self.assertSetEqual(lib.predicates, { + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + + # specify collect and discard + lib = BSIE(self.pipeline, + collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar}, + discard={ns.bse.author, ns.bse.foo, ns.bse.foobar}, + ) + self.assertSetEqual(lib.predicates, { + ns.bse.filesize, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> + prefix xsd: <http://www.w3.org/2001/XMLSchema#> + prefix bsfs: <http://bsfs.ai/schema/> + prefix bse: <http://bsfs.ai/schema/Entity#> + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + ''')) + + + def test_from_file(self): + # setup + lib = BSIE(self.pipeline) + self.assertSetEqual(set(lib.predicates), { + ns.bse.filesize, + ns.bse.filename, + ns.bse.author, + }) + content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' + subject = Node(ns.bsfs.Entity, self.prefix + content_hash) + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + + # from_file extracts all available triples + self.assertSetEqual(set(lib.from_file(testfile)), { + (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'), + (subject, lib.schema.predicate(ns.bse.filesize), 12), + (subject, lib.schema.predicate(ns.bse.author), 'Me, myself, and I'), + }) + + # from_file respects predicate argument + self.assertSetEqual(set(lib.from_file(testfile, {ns.bse.filename, ns.bse.invalid})), { + (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'), + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/lib/testfile.t b/test/lib/testfile.t new file mode 100644 index 0000000..3b18e51 --- /dev/null +++ b/test/lib/testfile.t @@ -0,0 +1 @@ +hello world |