""" Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports import os import unittest # bsie imports from bsie.base import extractor from bsie.tools import builder from bsie.utils.bsfs import URI, schema from bsie.utils.node import Node from bsie.utils import bsfs, node, ns # objects to test from bsie.lib.bsie import BSIE ## code ## class TestBSIE(unittest.TestCase): def setUp(self): # reader builder rbuild = builder.ReaderBuilder({}) # extractor builder ebuild = builder.ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], schema=''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''', )}, ]) # build pipeline self.prefix = URI('http://example.com/local/file#') pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild) self.pipeline = pbuild.build() def test_construction(self): # pipeline only lib = BSIE(self.pipeline) self.assertSetEqual(lib.predicates, { ns.bse.filename, ns.bse.filesize, ns.bse.author, }) self.assertEqual(lib.schema, schema.Schema.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: prefix bse: # essential nodes bsfs:Entity rdfs:subClassOf bsfs:Node . # common definitions xsd:string rdfs:subClassOf bsfs:Literal . xsd:integer rdfs:subClassOf bsfs:Literal . bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; bsfs:unique "false"^^xsd:boolean . bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer; bsfs:unique "false"^^xsd:boolean . bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''')) # specify collect lib = BSIE(self.pipeline, collect={ ns.bse.filesize, ns.bse.author, ns.bse.inexistent, }) self.assertSetEqual(lib.predicates, { ns.bse.filesize, ns.bse.author, }) self.assertEqual(lib.schema, schema.Schema.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: prefix bse: # essential nodes bsfs:Entity rdfs:subClassOf bsfs:Node . # common definitions xsd:string rdfs:subClassOf bsfs:Literal . xsd:integer rdfs:subClassOf bsfs:Literal . bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer; bsfs:unique "false"^^xsd:boolean . bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''')) # empty collect is disregarded lib = BSIE(self.pipeline, collect={}) self.assertSetEqual(lib.predicates, { ns.bse.filename, ns.bse.filesize, ns.bse.author, }) self.assertEqual(lib.schema, schema.Schema.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: prefix bse: # essential nodes bsfs:Entity rdfs:subClassOf bsfs:Node . # common definitions xsd:string rdfs:subClassOf bsfs:Literal . xsd:integer rdfs:subClassOf bsfs:Literal . bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; bsfs:unique "false"^^xsd:boolean . bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer; bsfs:unique "false"^^xsd:boolean . bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''')) # specify discard lib = BSIE(self.pipeline, discard={ ns.bse.filesize, ns.bse.filename, ns.bse.inexistent, }) self.assertSetEqual(lib.predicates, { ns.bse.author, }) self.assertEqual(lib.schema, schema.Schema.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: prefix bse: # essential nodes bsfs:Entity rdfs:subClassOf bsfs:Node . # common definitions xsd:string rdfs:subClassOf bsfs:Literal . bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''')) # specify collect and discard lib = BSIE(self.pipeline, collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar}, discard={ns.bse.author, ns.bse.foo, ns.bse.foobar}, ) self.assertSetEqual(lib.predicates, { ns.bse.filesize, }) self.assertEqual(lib.schema, schema.Schema.from_string(''' prefix rdfs: prefix xsd: prefix bsfs: prefix bse: # essential nodes bsfs:Entity rdfs:subClassOf bsfs:Node . # common definitions xsd:integer rdfs:subClassOf bsfs:Literal . bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer; bsfs:unique "false"^^xsd:boolean . ''')) def test_from_file(self): # setup lib = BSIE(self.pipeline) self.assertSetEqual(set(lib.predicates), { ns.bse.filesize, ns.bse.filename, ns.bse.author, }) content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') # from_file extracts all available triples self.assertSetEqual(set(lib.from_file(testfile)), { (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'), (subject, lib.schema.predicate(ns.bse.filesize), 12), (subject, lib.schema.predicate(ns.bse.author), 'Me, myself, and I'), }) # from_file respects predicate argument self.assertSetEqual(set(lib.from_file(testfile, {ns.bse.filename, ns.bse.invalid})), { (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'), }) ## main ## if __name__ == '__main__': unittest.main() ## EOF ##