From 559e643bb1fa39feefd2eb73847ad9420daf1deb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 14 Dec 2022 06:10:25 +0100 Subject: bsie extraction and info apps --- test/lib/test_bsie.py | 231 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 test/lib/test_bsie.py (limited to 'test/lib/test_bsie.py') diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py new file mode 100644 index 0000000..277ac67 --- /dev/null +++ b/test/lib/test_bsie.py @@ -0,0 +1,231 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import os +import unittest + +# bsie imports +from bsie.tools import builder +from bsie.utils import ns +from bsie.utils.bsfs import URI, schema +from bsie.utils.node import Node + +# objects to test +from bsie.lib.bsie import BSIE + + +## code ## + +class TestBSIE(unittest.TestCase): + def setUp(self): + # reader builder + rbuild = builder.ReaderBuilder({}) + # extractor builder + ebuild = builder.ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.constant.Constant': dict( + tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], + schema=''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''', + )}, + ]) + # build pipeline + self.prefix = URI('http://example.com/local/file#') + pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild) + self.pipeline = pbuild.build() + + def test_construction(self): + # pipeline only + lib = BSIE(self.pipeline) + self.assertSetEqual(lib.predicates, { + ns.bse.filename, + ns.bse.filesize, + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + + # specify collect + lib = BSIE(self.pipeline, collect={ + ns.bse.filesize, + ns.bse.author, + ns.bse.inexistent, + }) + self.assertSetEqual(lib.predicates, { + ns.bse.filesize, + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + # empty collect is disregarded + lib = BSIE(self.pipeline, collect={}) + self.assertSetEqual(lib.predicates, { + ns.bse.filename, + ns.bse.filesize, + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + + # specify discard + lib = BSIE(self.pipeline, discard={ + ns.bse.filesize, + ns.bse.filename, + ns.bse.inexistent, + }) + self.assertSetEqual(lib.predicates, { + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + + # specify collect and discard + lib = BSIE(self.pipeline, + collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar}, + discard={ns.bse.author, ns.bse.foo, ns.bse.foobar}, + ) + self.assertSetEqual(lib.predicates, { + ns.bse.filesize, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + ''')) + + + def test_from_file(self): + # setup + lib = BSIE(self.pipeline) + self.assertSetEqual(set(lib.predicates), { + ns.bse.filesize, + ns.bse.filename, + ns.bse.author, + }) + content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' + subject = Node(ns.bsfs.Entity, self.prefix + content_hash) + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + + # from_file extracts all available triples + self.assertSetEqual(set(lib.from_file(testfile)), { + (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'), + (subject, lib.schema.predicate(ns.bse.filesize), 12), + (subject, lib.schema.predicate(ns.bse.author), 'Me, myself, and I'), + }) + + # from_file respects predicate argument + self.assertSetEqual(set(lib.from_file(testfile, {ns.bse.filename, ns.bse.invalid})), { + (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'), + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## -- cgit v1.2.3 From 3b7fee369924eb7704709edeb8c17fff9c020dfb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:06:09 +0100 Subject: import fixes --- test/lib/test_bsie.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'test/lib/test_bsie.py') diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 277ac67..5b71752 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -9,10 +9,11 @@ import os import unittest # bsie imports +from bsie.base import extractor from bsie.tools import builder -from bsie.utils import ns from bsie.utils.bsfs import URI, schema from bsie.utils.node import Node +from bsie.utils import bsfs, node, ns # objects to test from bsie.lib.bsie import BSIE @@ -76,7 +77,6 @@ class TestBSIE(unittest.TestCase): rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . - ''')) # specify collect @@ -207,7 +207,7 @@ class TestBSIE(unittest.TestCase): ns.bse.author, }) content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = Node(ns.bsfs.Entity, self.prefix + content_hash) + subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') # from_file extracts all available triples -- cgit v1.2.3 From 8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:12:56 +0100 Subject: file node class in default schema --- test/lib/test_bsie.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'test/lib/test_bsie.py') diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 5b71752..6720746 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -64,12 +64,12 @@ class TestBSIE(unittest.TestCase): xsd:integer rdfs:subClassOf bsfs:Literal . bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:string ; bsfs:unique "false"^^xsd:boolean . bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer; bsfs:unique "false"^^xsd:boolean . @@ -101,7 +101,7 @@ class TestBSIE(unittest.TestCase): xsd:integer rdfs:subClassOf bsfs:Literal . bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer; bsfs:unique "false"^^xsd:boolean . @@ -130,12 +130,12 @@ class TestBSIE(unittest.TestCase): xsd:integer rdfs:subClassOf bsfs:Literal . bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:string ; bsfs:unique "false"^^xsd:boolean . bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer; bsfs:unique "false"^^xsd:boolean . @@ -191,7 +191,7 @@ class TestBSIE(unittest.TestCase): xsd:integer rdfs:subClassOf bsfs:Literal . bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer; bsfs:unique "false"^^xsd:boolean . -- cgit v1.2.3 From 5d9526783ad8432c7d6dfe18c0e9f2b37950b470 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:16:25 +0100 Subject: Pipeline.prefix as Namespace instead of URI --- test/lib/test_bsie.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/lib/test_bsie.py') diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 6720746..43e7b1d 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -40,7 +40,7 @@ class TestBSIE(unittest.TestCase): )}, ]) # build pipeline - self.prefix = URI('http://example.com/local/file#') + self.prefix = bsfs.Namespace('http://example.com/local/file#') pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild) self.pipeline = pbuild.build() -- cgit v1.2.3 From 37510d134458bf954ca2da6d40be0d6c76661e8c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:19:21 +0100 Subject: bsie/pipeline interface revision: * predicates -> principals * schema as property * principals as property * information hiding * full subschema instead of only predicates --- test/lib/test_bsie.py | 74 ++++++++------------------------------------------- 1 file changed, 11 insertions(+), 63 deletions(-) (limited to 'test/lib/test_bsie.py') diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 43e7b1d..f3f476e 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -11,8 +11,6 @@ import unittest # bsie imports from bsie.base import extractor from bsie.tools import builder -from bsie.utils.bsfs import URI, schema -from bsie.utils.node import Node from bsie.utils import bsfs, node, ns # objects to test @@ -47,22 +45,12 @@ class TestBSIE(unittest.TestCase): def test_construction(self): # pipeline only lib = BSIE(self.pipeline) - self.assertSetEqual(lib.predicates, { + self.assertSetEqual(set(lib.principals), { ns.bse.filename, ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - # essential nodes - bsfs:Entity rdfs:subClassOf bsfs:Node . - # common definitions - xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . - + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; @@ -85,21 +73,11 @@ class TestBSIE(unittest.TestCase): ns.bse.author, ns.bse.inexistent, }) - self.assertSetEqual(lib.predicates, { + self.assertSetEqual(set(lib.principals), { ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - # essential nodes - bsfs:Entity rdfs:subClassOf bsfs:Node . - # common definitions - xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . - + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer; @@ -109,26 +87,15 @@ class TestBSIE(unittest.TestCase): rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . - ''')) # empty collect is disregarded lib = BSIE(self.pipeline, collect={}) - self.assertSetEqual(lib.predicates, { + self.assertSetEqual(set(lib.principals), { ns.bse.filename, ns.bse.filesize, ns.bse.author, }) - self.assertEqual(lib.schema, schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - # essential nodes - bsfs:Entity rdfs:subClassOf bsfs:Node . - # common definitions - xsd:string rdfs:subClassOf bsfs:Literal . - xsd:integer rdfs:subClassOf bsfs:Literal . - + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:string ; @@ -152,24 +119,14 @@ class TestBSIE(unittest.TestCase): ns.bse.filename, ns.bse.inexistent, }) - self.assertSetEqual(lib.predicates, { + self.assertSetEqual(set(lib.principals), { ns.bse.author, }) - self.assertEqual(lib.schema, schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - # essential nodes - bsfs:Entity rdfs:subClassOf bsfs:Node . - # common definitions - xsd:string rdfs:subClassOf bsfs:Literal . - + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . - ''')) # specify collect and discard @@ -177,19 +134,10 @@ class TestBSIE(unittest.TestCase): collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar}, discard={ns.bse.author, ns.bse.foo, ns.bse.foobar}, ) - self.assertSetEqual(lib.predicates, { + self.assertSetEqual(set(lib.principals), { ns.bse.filesize, }) - self.assertEqual(lib.schema, schema.Schema.from_string(''' - prefix rdfs: - prefix xsd: - prefix bsfs: - prefix bse: - # essential nodes - bsfs:Entity rdfs:subClassOf bsfs:Node . - # common definitions - xsd:integer rdfs:subClassOf bsfs:Literal . - + self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:File ; rdfs:range xsd:integer; @@ -201,7 +149,7 @@ class TestBSIE(unittest.TestCase): def test_from_file(self): # setup lib = BSIE(self.pipeline) - self.assertSetEqual(set(lib.predicates), { + self.assertSetEqual(set(lib.principals), { ns.bse.filesize, ns.bse.filename, ns.bse.author, -- cgit v1.2.3 From 057e09d6537bf5c39815661a75819081e3e5fda7 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sun, 18 Dec 2022 13:37:59 +0100 Subject: adaptions to updates in bsfs --- test/lib/test_bsie.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test/lib/test_bsie.py') diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index f3f476e..771a0c2 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -38,7 +38,7 @@ class TestBSIE(unittest.TestCase): )}, ]) # build pipeline - self.prefix = bsfs.Namespace('http://example.com/local/file#') + self.prefix = bsfs.Namespace('http://example.com/local/') pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild) self.pipeline = pbuild.build() @@ -155,7 +155,7 @@ class TestBSIE(unittest.TestCase): ns.bse.author, }) content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' - subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash) + subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash]) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') # from_file extracts all available triples -- cgit v1.2.3