aboutsummaryrefslogtreecommitdiffstats
path: root/test/lib/test_bsie.py
diff options
context:
space:
mode:
Diffstat (limited to 'test/lib/test_bsie.py')
-rw-r--r--test/lib/test_bsie.py83
1 files changed, 40 insertions, 43 deletions
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
index 771a0c2..0c393cc 100644
--- a/test/lib/test_bsie.py
+++ b/test/lib/test_bsie.py
@@ -1,16 +1,13 @@
-"""
-Part of the bsie test suite.
-A copy of the license is provided with the project.
-Author: Matthias Baumgartner, 2022
-"""
-# imports
+# standard imports
import os
import unittest
# bsie imports
-from bsie.base import extractor
-from bsie.tools import builder
+from bsie.extractor import ExtractorBuilder
+from bsie.extractor.base import SCHEMA_PREAMBLE
+from bsie.lib import PipelineBuilder, DefaultNamingPolicy
+from bsie.reader import ReaderBuilder
from bsie.utils import bsfs, node, ns
# objects to test
@@ -22,53 +19,53 @@ from bsie.lib.bsie import BSIE
class TestBSIE(unittest.TestCase):
def setUp(self):
# reader builder
- rbuild = builder.ReaderBuilder({})
+ rbuild = ReaderBuilder({})
# extractor builder
- ebuild = builder.ExtractorBuilder([
+ ebuild = ExtractorBuilder([
{'bsie.extractor.generic.path.Path': {}},
{'bsie.extractor.generic.stat.Stat': {}},
{'bsie.extractor.generic.constant.Constant': dict(
- tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')],
+ tuples=[('https://schema.bsfs.io/ie/Node/Entity#author', 'Me, myself, and I')],
schema='''
bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:string ;
bsfs:unique "true"^^xsd:boolean .
''',
)},
])
# build pipeline
- self.prefix = bsfs.Namespace('http://example.com/local/')
- pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild)
+ self.naming_policy = DefaultNamingPolicy(host='http://example.com/local', user='me')
+ pbuild = PipelineBuilder(rbuild, ebuild)
self.pipeline = pbuild.build()
def test_construction(self):
- # pipeline only
- lib = BSIE(self.pipeline)
+ # only pipeline and naming policy
+ lib = BSIE(self.pipeline, self.naming_policy)
self.assertSetEqual(set(lib.principals), {
ns.bse.filename,
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:File ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:string ;
- bsfs:unique "false"^^xsd:boolean .
+ bsfs:unique "true"^^xsd:boolean .
bse:filesize rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:File ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:integer;
- bsfs:unique "false"^^xsd:boolean .
+ bsfs:unique "true"^^xsd:boolean .
bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:string ;
bsfs:unique "true"^^xsd:boolean .
'''))
# specify collect
- lib = BSIE(self.pipeline, collect={
+ lib = BSIE(self.pipeline, self.naming_policy, collect={
ns.bse.filesize,
ns.bse.author,
ns.bse.inexistent,
@@ -77,44 +74,44 @@ class TestBSIE(unittest.TestCase):
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:File ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:integer;
- bsfs:unique "false"^^xsd:boolean .
+ bsfs:unique "true"^^xsd:boolean .
bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:string ;
bsfs:unique "true"^^xsd:boolean .
'''))
# empty collect is disregarded
- lib = BSIE(self.pipeline, collect={})
+ lib = BSIE(self.pipeline, self.naming_policy, collect={})
self.assertSetEqual(set(lib.principals), {
ns.bse.filename,
ns.bse.filesize,
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:File ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:string ;
- bsfs:unique "false"^^xsd:boolean .
+ bsfs:unique "true"^^xsd:boolean .
bse:filesize rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:File ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:integer;
- bsfs:unique "false"^^xsd:boolean .
+ bsfs:unique "true"^^xsd:boolean .
bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:string ;
bsfs:unique "true"^^xsd:boolean .
'''))
# specify discard
- lib = BSIE(self.pipeline, discard={
+ lib = BSIE(self.pipeline, self.naming_policy, discard={
ns.bse.filesize,
ns.bse.filename,
ns.bse.inexistent,
@@ -122,40 +119,40 @@ class TestBSIE(unittest.TestCase):
self.assertSetEqual(set(lib.principals), {
ns.bse.author,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:author rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:string ;
bsfs:unique "true"^^xsd:boolean .
'''))
# specify collect and discard
- lib = BSIE(self.pipeline,
+ lib = BSIE(self.pipeline, self.naming_policy,
collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar},
discard={ns.bse.author, ns.bse.foo, ns.bse.foobar},
)
self.assertSetEqual(set(lib.principals), {
ns.bse.filesize,
})
- self.assertEqual(lib.schema, bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:File ;
+ rdfs:domain bsn:Entity ;
rdfs:range xsd:integer;
- bsfs:unique "false"^^xsd:boolean .
+ bsfs:unique "true"^^xsd:boolean .
'''))
def test_from_file(self):
# setup
- lib = BSIE(self.pipeline)
+ lib = BSIE(self.pipeline, self.naming_policy)
self.assertSetEqual(set(lib.principals), {
ns.bse.filesize,
ns.bse.filename,
ns.bse.author,
})
content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
- subject = node.Node(ns.bsfs.File, (self.prefix + 'file#')[content_hash])
+ subject = node.Node(ns.bsn.Entity, uri=f'http://example.com/local/me/file#{content_hash}')
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
# from_file extracts all available triples