# standard imports import os import unittest # bsie imports from bsie.extractor import ExtractorBuilder from bsie.extractor.base import SCHEMA_PREAMBLE from bsie.lib import PipelineBuilder, DefaultNamingPolicy from bsie.reader import ReaderBuilder from bsie.utils import bsfs, node, ns # objects to test from bsie.lib.bsie import BSIE ## code ## class TestBSIE(unittest.TestCase): def setUp(self): # reader builder rbuild = ReaderBuilder({}) # extractor builder ebuild = ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( tuples=[('https://schema.bsfs.io/ie/Node/Entity#author', 'Me, myself, and I')], schema=''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''', )}, ]) # build pipeline self.naming_policy = DefaultNamingPolicy(host='http://example.com/local', user='me') pbuild = PipelineBuilder(rbuild, ebuild) self.pipeline = pbuild.build() def test_construction(self): # only pipeline and naming policy lib = BSIE(self.pipeline, self.naming_policy) self.assertSetEqual(set(lib.principals), { ns.bse.filename, ns.bse.dirname, ns.bse.filesize, ns.bse.author, }) self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . bse:dirname rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:integer; bsfs:unique "true"^^xsd:boolean . bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''')) # specify collect lib = BSIE(self.pipeline, self.naming_policy, collect={ ns.bse.filesize, ns.bse.author, ns.bse.inexistent, }) self.assertSetEqual(set(lib.principals), { ns.bse.filesize, ns.bse.author, }) self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:integer; bsfs:unique "true"^^xsd:boolean . bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''')) # empty collect is disregarded lib = BSIE(self.pipeline, self.naming_policy, collect={}) self.assertSetEqual(set(lib.principals), { ns.bse.filename, ns.bse.dirname, ns.bse.filesize, ns.bse.author, }) self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . bse:dirname rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:integer; bsfs:unique "true"^^xsd:boolean . bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''')) # specify discard lib = BSIE(self.pipeline, self.naming_policy, discard={ ns.bse.filesize, ns.bse.filename, ns.bse.inexistent, }) self.assertSetEqual(set(lib.principals), { ns.bse.author, ns.bse.dirname, }) self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . bse:dirname rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''')) # specify collect and discard lib = BSIE(self.pipeline, self.naming_policy, collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar}, discard={ns.bse.author, ns.bse.foo, ns.bse.foobar}, ) self.assertSetEqual(set(lib.principals), { ns.bse.filesize, }) self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:integer; bsfs:unique "true"^^xsd:boolean . ''')) def test_from_file(self): # setup lib = BSIE(self.pipeline, self.naming_policy) self.assertSetEqual(set(lib.principals), { ns.bse.filesize, ns.bse.filename, ns.bse.dirname, ns.bse.author, }) content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' subject = node.Node(ns.bsn.Entity, uri=f'http://example.com/local/me/file#{content_hash}') testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') # from_file extracts all available triples self.assertSetEqual(set(lib.from_file(testfile)), { (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'), (subject, lib.schema.predicate(ns.bse.dirname), os.path.dirname(__file__)), (subject, lib.schema.predicate(ns.bse.filesize), 12), (subject, lib.schema.predicate(ns.bse.author), 'Me, myself, and I'), }) # from_file respects predicate argument self.assertSetEqual(set(lib.from_file(testfile, {ns.bse.filename, ns.bse.invalid})), { (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'), }) ## main ## if __name__ == '__main__': unittest.main() ## EOF ##