""" Part of the bsie test suite. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports import logging import os import unittest # bsie imports from bsie.base import errors from bsie.utils import ns from bsie.utils.bsfs import URI from bsie.utils.node import Node import bsie.extractor.generic.constant import bsie.extractor.generic.path import bsie.extractor.generic.stat import bsie.reader.path import bsie.reader.stat # objects to test from bsie.tools.pipeline import Pipeline ## code ## class TestPipeline(unittest.TestCase): def setUp(self): # constant A csA = ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; owl:maxCardinality "1"^^xsd:number . ''' tupA = [('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')] # constant B csB = ''' bse:rating rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:integer ; owl:maxCardinality "1"^^xsd:number . ''' tupB = [('http://bsfs.ai/schema/Entity#rating', 123)] # extractors/readers self.ext2rdr = { bsie.extractor.generic.path.Path(): bsie.reader.path.Path(), bsie.extractor.generic.stat.Stat(): bsie.reader.stat.Stat(), bsie.extractor.generic.constant.Constant(csA, tupA): None, bsie.extractor.generic.constant.Constant(csB, tupB): None, } self.prefix = URI('http://example.com/local/file#') def test_essentials(self): pipeline = Pipeline(self.prefix, self.ext2rdr) self.assertEqual(str(pipeline), 'Pipeline') self.assertEqual(repr(pipeline), 'Pipeline(...)') def test_equality(self): pipeline = Pipeline(self.prefix, self.ext2rdr) # a pipeline is equivalent to itself self.assertEqual(pipeline, pipeline) self.assertEqual(hash(pipeline), hash(pipeline)) # identical builds are equivalent self.assertEqual(pipeline, Pipeline(self.prefix, self.ext2rdr)) self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr))) # equivalence respects prefix self.assertNotEqual(pipeline, Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr)) self.assertNotEqual(hash(pipeline), hash(Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr))) # equivalence respects extractors/readers ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0} self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr)) self.assertNotEqual(hash(pipeline), hash(Pipeline(self.prefix, ext2rdr))) # equivalence respects schema p2 = Pipeline(self.prefix, self.ext2rdr) p2.schema = pipeline.schema.Empty() self.assertNotEqual(pipeline, p2) self.assertNotEqual(hash(pipeline), hash(p2)) # not equal to other types class Foo(): pass self.assertNotEqual(pipeline, Foo()) self.assertNotEqual(hash(pipeline), hash(Foo())) self.assertNotEqual(pipeline, 123) self.assertNotEqual(hash(pipeline), hash(123)) self.assertNotEqual(pipeline, None) self.assertNotEqual(hash(pipeline), hash(None)) def test_call(self): # build pipeline pipeline = Pipeline(self.prefix, self.ext2rdr) # build objects for tests content_hash = 'e3bb4ab54e4a50d75626a1f76814f152f4edc60a82ad724aa2aa922ca5534427' subject = Node(ns.bsfs.Entity, self.prefix + content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) p_filesize = pipeline.schema.predicate(ns.bse.filesize) p_author = pipeline.schema.predicate(ns.bse.author) p_rating = pipeline.schema.predicate(ns.bse.rating) entity = pipeline.schema.node(ns.bsfs.Entity) p_invalid = pipeline.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, range=entity) # extract given predicates self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), { (subject, p_filename, 'testfile.t'), (subject, p_filesize, 11), }) self.assertSetEqual(set(pipeline(testfile, {p_author})), { (subject, p_author, 'Me, myself, and I'), }) self.assertSetEqual(set(pipeline(testfile, {p_filename})), { (subject, p_filename, 'testfile.t'), }) self.assertSetEqual(set(pipeline(testfile, {p_filesize})), { (subject, p_filesize, 11), }) # extract all predicates self.assertSetEqual(set(pipeline(testfile)), { (subject, p_filename, 'testfile.t'), (subject, p_filesize, 11), (subject, p_author, 'Me, myself, and I'), (subject, p_rating, 123), }) # invalid predicate self.assertSetEqual(set(pipeline(testfile, {p_invalid})), set()) # valid/invalid predicates mixed self.assertSetEqual(set(pipeline(testfile, {p_filename, p_invalid})), { (subject, p_filename, 'testfile.t'), }) # invalid path self.assertRaises(FileNotFoundError, list, pipeline('inexistent_file')) # FIXME: unreadable file (e.g. permissions error) def test_call_reader_err(self): class FaultyReader(bsie.reader.path.Path): def __call__(self, path): raise errors.ReaderError('reader error') pipeline = Pipeline(self.prefix, {bsie.extractor.generic.path.Path(): FaultyReader()}) with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR): testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) def test_call_extractor_err(self): class FaultyExtractor(bsie.extractor.generic.path.Path): def extract(self, subject, content, predicates): raise errors.ExtractorError('extractor error') pipeline = Pipeline(self.prefix, {FaultyExtractor(): bsie.reader.path.Path()}) with self.assertLogs(logging.getLogger('bsie.tools.pipeline'), logging.ERROR): testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) ## main ## if __name__ == '__main__': unittest.main() ## EOF ##