From 559e643bb1fa39feefd2eb73847ad9420daf1deb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 14 Dec 2022 06:10:25 +0100 Subject: bsie extraction and info apps --- bsie.app | 49 ++++++ bsie/__init__.py | 6 + bsie/apps/__init__.py | 20 +++ bsie/apps/index.py | 131 ++++++++++++++++ bsie/apps/info.py | 74 +++++++++ bsie/base/errors.py | 6 + bsie/lib/__init__.py | 13 ++ bsie/lib/bsie.py | 80 ++++++++++ bsie/tools/pipeline.py | 4 + bsie/utils/namespaces.py | 2 +- test/apps/__init__.py | 0 test/apps/test_index.py | 159 ++++++++++++++++++++ test/apps/test_info.py | 42 ++++++ test/apps/testdir/alpha/alpha_first | 16 ++ test/apps/testdir/alpha/alpha_second | 12 ++ test/apps/testdir/alpha/omega/omega_first | 14 ++ test/apps/testdir/alpha/omega/omega_second | 10 ++ test/apps/testdir/foo/bar/bar_first | 20 +++ test/apps/testdir/foo/bar/bar_second | 14 ++ test/apps/testdir/foo/foo_first | 11 ++ test/apps/testdir/foo/foo_second | 12 ++ test/apps/testdir/td_first | 18 +++ test/apps/testdir/td_second | 14 ++ test/apps/testfile | 16 ++ test/lib/__init__.py | 0 test/lib/test_bsie.py | 231 +++++++++++++++++++++++++++++ test/lib/testfile.t | 1 + test/tools/test_pipeline.py | 20 ++- test/tools/testfile.t | 2 +- 29 files changed, 991 insertions(+), 6 deletions(-) create mode 100755 bsie.app create mode 100644 bsie/apps/__init__.py create mode 100644 bsie/apps/index.py create mode 100644 bsie/apps/info.py create mode 100644 bsie/lib/__init__.py create mode 100644 bsie/lib/bsie.py create mode 100644 test/apps/__init__.py create mode 100644 test/apps/test_index.py create mode 100644 test/apps/test_info.py create mode 100644 test/apps/testdir/alpha/alpha_first create mode 100644 test/apps/testdir/alpha/alpha_second create mode 100644 test/apps/testdir/alpha/omega/omega_first create mode 100644 test/apps/testdir/alpha/omega/omega_second create mode 100644 test/apps/testdir/foo/bar/bar_first create mode 100644 test/apps/testdir/foo/bar/bar_second create mode 100644 test/apps/testdir/foo/foo_first create mode 100644 test/apps/testdir/foo/foo_second create mode 100644 test/apps/testdir/td_first create mode 100644 test/apps/testdir/td_second create mode 100644 test/apps/testfile create mode 100644 test/lib/__init__.py create mode 100644 test/lib/test_bsie.py create mode 100644 test/lib/testfile.t diff --git a/bsie.app b/bsie.app new file mode 100755 index 0000000..ba9cee7 --- /dev/null +++ b/bsie.app @@ -0,0 +1,49 @@ +"""BSIE tools. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import typing + +# module imports +import bsie +import bsie.apps + +# exports +__all__: typing.Sequence[str] = ( + 'main', + ) + +# config +apps = { + 'index' : bsie.apps.index, + 'info' : bsie.apps.info, + } + + +## code ## + +def main(argv): + """Black Star File System maintenance tools.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='bsie') + parser.add_argument('--version', action='version', + version='%(prog)s version {}.{}.{}'.format(*bsie.version_info)) + parser.add_argument('app', choices=apps.keys(), + help='Select the application to run.') + parser.add_argument('rest', nargs=argparse.REMAINDER) + # parse + args = parser.parse_args() + # run application + apps[args.app](args.rest) + + +## main ## + +if __name__ == '__main__': + import sys + main(sys.argv[1:]) + +## EOF ## diff --git a/bsie/__init__.py b/bsie/__init__.py index 2f2477a..2b874bd 100644 --- a/bsie/__init__.py +++ b/bsie/__init__.py @@ -5,8 +5,14 @@ A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports +import collections import typing +# constants +version_info = collections.namedtuple('version_info', + ('major', 'minor', 'micro')) \ + (0, 0, 1) + # exports __all__: typing.Sequence[str] = [] diff --git a/bsie/apps/__init__.py b/bsie/apps/__init__.py new file mode 100644 index 0000000..a548c3c --- /dev/null +++ b/bsie/apps/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .index import main as index +from .info import main as info + +# exports +__all__: typing.Sequence[str] = ( + 'index', + 'info', + ) + +## EOF ## diff --git a/bsie/apps/index.py b/bsie/apps/index.py new file mode 100644 index 0000000..821aa4c --- /dev/null +++ b/bsie/apps/index.py @@ -0,0 +1,131 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import os +import typing + +# bsfs imports +import bsfs + +# bsie imports +from bsie.base import errors +from bsie.lib.bsie import BSIE +from bsie.tools import builder +from bsie.utils.bsfs import URI + +# exports +__all__: typing.Sequence[str] = ( + 'main', + ) + + +## code ## + +def main(argv): + """Index files or directories into BSFS.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='index') + parser.add_argument('--user', type=URI, default=URI('http://example.com/me'), + help='') + parser.add_argument('--collect', action='append', default=[], + help='') + parser.add_argument('--discard', action='append', default=[], + help='') + parser.add_argument('-r', '--recursive', action='store_true', default=False, + help='') + parser.add_argument('--follow', action='store_true', default=False, + help='') + parser.add_argument('--print', action='store_true', default=False, + help='') + parser.add_argument('input_file', nargs=argparse.REMAINDER, + help='') + args = parser.parse_args(argv) + + # FIXME: Read reader/extractor configs from a config file + # reader builder + rbuild = builder.ReaderBuilder({}) + # extractor builder + ebuild = builder.ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.constant.Constant': dict( + tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], + schema=''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''', + )}, + ]) + # pipeline builder + prefix = URI(args.user + ('file#' if args.user.endswith('/') else '/file#')) + pbuild = builder.PipelineBuilder( + prefix, + rbuild, + ebuild, + ) + + # build pipeline + pipeline = pbuild.build() + # build BSIE frontend + bsie = BSIE(pipeline, args.collect, args.discard) + + + def walk(handle): + """Walk through given input files.""" + # FIXME: collect all triples by node, set all predicates at once + # FIXME: simplify code (below but maybe also above) + # FIXME: How to handle dependencies between data? + # E.g. do I still want to link to a tag despite not being permitted to set its label? + # FIXME: node renaming? + + # index input paths + for path in args.input_file: + if os.path.isdir(path) and args.recursive: + for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow): + for filename in filenames: + for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)): + handle(node, pred, value) + elif os.path.isfile(path): + for node, pred, value in bsie.from_file(path): + handle(node, pred, value) + else: + raise errors.UnreachableError() + + + if args.print: + walk(print) + return None + + else: + # initialize bsfs + # NOTE: With presistent storages, the schema migration will be a seaparte operation. + # Here, we'd simply examine the schema and potentially discard more predicates. + store = bsfs.Open({ + 'Graph': { + 'user': args.user, + 'backend': { + 'SparqlStore': {}}, + }}) + store.migrate(bsie.schema) + # process files + def handle(node, pred, value): + store.node(node.node_type, node.uri).set(pred.uri, value) + walk(handle) + # return store + return store + + + +## main ## + +if __name__ == '__main__': + import sys + main(sys.argv[1:]) + +## EOF ## diff --git a/bsie/apps/info.py b/bsie/apps/info.py new file mode 100644 index 0000000..8cc6dca --- /dev/null +++ b/bsie/apps/info.py @@ -0,0 +1,74 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import sys +import typing + +# bsie imports +from bsie.base import errors +from bsie.tools import builder +from bsie.utils.bsfs import URI + +# exports +__all__: typing.Sequence[str] = ( + 'main', + ) + + +## code ## + +def main(argv): + """Show information from BSIE.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='info') + parser.add_argument('what', choices=('predicates', ), + help='Select what information to show.') + args = parser.parse_args(argv) + + # FIXME: Read reader/extractor configs from a config file + # reader builder + rbuild = builder.ReaderBuilder({}) + # extractor builder + ebuild = builder.ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.constant.Constant': dict( + tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], + schema=''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''', + )}, + ]) + # pipeline builder + pbuild = builder.PipelineBuilder( + URI('http://example.com/me/file#'), # not actually used + rbuild, + ebuild, + ) + + # build pipeline + pipeline = pbuild.build() + + # show info + if args.what == 'predicates': + # show predicates + for pred in pipeline.schema.predicates(): + print(pred.uri) + else: + # args.what is already checked by argparse + raise errors.UnreachableError() + + +## main ## + +if __name__ == '__main__': + main(sys.argv[1:]) + +## EOF ## diff --git a/bsie/base/errors.py b/bsie/base/errors.py index 760351f..dc3c30e 100644 --- a/bsie/base/errors.py +++ b/bsie/base/errors.py @@ -33,4 +33,10 @@ class ExtractorError(_BSIEError): class ReaderError(_BSIEError): """The Reader failed to read the given file.""" +class ProgrammingError(_BSIEError): + """An assertion-like error that indicates a code-base issue.""" + +class UnreachableError(ProgrammingError): + """Bravo, you've reached a point in code that should logically not be reachable.""" + ## EOF ## diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py new file mode 100644 index 0000000..f6c9018 --- /dev/null +++ b/bsie/lib/__init__.py @@ -0,0 +1,13 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = [] + +## EOF ## diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py new file mode 100644 index 0000000..aeccc8c --- /dev/null +++ b/bsie/lib/bsie.py @@ -0,0 +1,80 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# bsie imports +from bsie.tools.pipeline import Pipeline +from bsie.utils import node, ns +from bsie.utils.bsfs import URI, schema as schema_ + +# exports +__all__: typing.Sequence[str] = ( + 'BSIE', + ) + + +## code ## + +class BSIE(): + """Extract triples from files. + + Controls which predicates to extract (*collect*) and + which to not extract (*discard*). Note that this only affects + principal predicates not auxililary predicates like, e.g., tag labels. + + """ + + # predicates to extract. + predicates: typing.Set[URI] + + # local schema. + schema: schema_.Schema + + def __init__( + self, + # pipeline builder. + pipeline: Pipeline, + # predicates to extract at most. None implies all available w.r.t. extractors. + collect: typing.Optional[typing.Iterable[URI]] = None, + # predicates to discard. + discard: typing.Optional[typing.Iterable[URI]] = None, + ): + # store pipeline + self.pipeline = pipeline + # start off with available predicates + self.predicates = {pred.uri for pred in self.pipeline.predicates()} + # limit predicates to specified ones by argument. + if collect is not None: + collect = set(collect) + if len(collect) > 0: + self.predicates &= collect + # discard predicates. + if discard is not None: + self.predicates -= set(discard) + # discard ns.bsfs.Predicate + self.predicates.discard(ns.bsfs.Predicate) + # compile a schema that only contains the requested predicates (and implied types) + self.schema = schema_.Schema({ + self.pipeline.schema.predicate(pred) for pred in self.predicates}) + + def from_file( + self, + path: URI, + predicates: typing.Optional[typing.Iterable[URI]] = None, + ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + """Produce triples for a given *path*. Limit to *predicates* if given.""" + # get requested predicates. + predicates = set(predicates) if predicates is not None else self.predicates + # filter through requested predicates. + predicates &= self.predicates + # predicate lookup + predicates = {self.schema.predicate(pred) for pred in predicates} + # invoke pipeline + yield from self.pipeline(path, predicates) + +## EOF ## diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py index 8e1c992..da422c0 100644 --- a/bsie/tools/pipeline.py +++ b/bsie/tools/pipeline.py @@ -70,6 +70,10 @@ class Pipeline(): and self._prefix == other._prefix \ and self._ext2rdr == other._ext2rdr + def predicates(self) -> typing.Iterator[_schema.Predicate]: + """Return the predicates that are extracted from a file.""" + return iter({pred for ext in self._ext2rdr for pred in ext.predicates()}) + def __call__( self, path: URI, diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py index 13be96b..2fcb2dc 100644 --- a/bsie/utils/namespaces.py +++ b/bsie/utils/namespaces.py @@ -13,7 +13,7 @@ from . import bsfs as _bsfs # constants bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity#') bsfs = _bsfs.Namespace('http://bsfs.ai/schema/') -bsm = _bsfs.Namespace('http://bsfs.ai/schema/meta#') +bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta#') xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema#') # export diff --git a/test/apps/__init__.py b/test/apps/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/apps/test_index.py b/test/apps/test_index.py new file mode 100644 index 0000000..6d47df8 --- /dev/null +++ b/test/apps/test_index.py @@ -0,0 +1,159 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import io +import os +import rdflib +import sys +import unittest + +# bsie imports +from bsie.utils import ns + +# objects to test +from bsie.apps.index import main + + +## code ## + +class TestIndex(unittest.TestCase): + def test_main(self): + bsfs = main([ + '-r', + '--user', 'http://example.com/me', + os.path.join(os.path.dirname(__file__), 'testdir'), + os.path.join(os.path.dirname(__file__), 'testfile'), + ]) + + prefix = 'http://example.com/me/file#' + self.assertTrue(set(bsfs._Graph__backend.graph).issuperset({ + (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('696', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('503', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('911', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testfile', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('885', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('956', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('648', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('754', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('585', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('636', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_first', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('546', datatype=rdflib.XSD.integer)), + (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)), + (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_second', datatype=rdflib.XSD.string)), + (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('703', datatype=rdflib.XSD.integer)), + })) + + # NOTE: we don't check ns.bsm.t_created since it depends on the execution time. Triples would look like this: + # (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)), + # instead, we simply check if there's such a predicate for each file + self.assertSetEqual({sub for sub, _ in bsfs._Graph__backend.graph.subject_objects(rdflib.URIRef(ns.bsm.t_created))}, { + rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), + rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), + rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), + rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), + rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), + rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), + rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), + rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), + rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), + rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), + rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), + }) + + def test_print(self): + stdout, sys.stdout = sys.stdout, io.StringIO() + bsfs = main([ + '--print', + '-r', + '--user', 'http://example.com/me', + os.path.join(os.path.dirname(__file__), 'testdir'), + os.path.join(os.path.dirname(__file__), 'testfile'), + ]) + outbuf, sys.stdout = sys.stdout, stdout + self.assertSetEqual(set(outbuf.getvalue().split('\n')) - {''}, { + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filename}) alpha_second', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filesize}) 696', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filename}) omega_second', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filesize}) 503', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filename}) td_first', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filesize}) 911', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filename}) testfile', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filesize}) 885', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filename}) bar_first', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filesize}) 956', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filename}) omega_first', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filesize}) 648', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filename}) alpha_first', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filesize}) 754', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filename}) foo_second', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filesize}) 585', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filename}) bar_second', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filesize}) 636', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filename}) foo_first', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filesize}) 546', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.author}) Me, myself, and I', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filename}) td_second', + f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filesize}) 703', + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/apps/test_info.py b/test/apps/test_info.py new file mode 100644 index 0000000..60a540e --- /dev/null +++ b/test/apps/test_info.py @@ -0,0 +1,42 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import io +import os +import sys +import unittest + +# objects to test +from bsie.apps.info import main + + +## code ## + +class TestIndex(unittest.TestCase): + def test_predicates(self): + stdout, sys.stdout = sys.stdout, io.StringIO() + # show predicates infos + main(['predicates']) + outbuf, sys.stdout = sys.stdout, stdout + # verify output + self.assertSetEqual({pred for pred in outbuf.getvalue().split('\n') if pred != ''}, { + 'http://bsfs.ai/schema/Entity#author', + 'http://bsfs.ai/schema/Predicate', + 'http://bsfs.ai/schema/Entity#filename', + 'http://bsfs.ai/schema/Entity#filesize', + }) + + def test_invalid(self): + self.assertRaises(SystemExit, main, ['foobar']) + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/apps/testdir/alpha/alpha_first b/test/apps/testdir/alpha/alpha_first new file mode 100644 index 0000000..f96fdee --- /dev/null +++ b/test/apps/testdir/alpha/alpha_first @@ -0,0 +1,16 @@ +Turpis tincidunt id aliquet risus feugiat in ante metus. +Vel turpis nunc eget lorem dolor. +Lorem mollis aliquam ut porttitor leo a diam sollicitudin. +Sit amet mattis vulputate enim nulla aliquet porttitor lacus luctus. +Vitae et leo duis ut diam. +Integer eget aliquet nibh praesent tristique magna sit. +Volutpat sed cras ornare arcu dui. +Consectetur adipiscing elit duis tristique sollicitudin nibh. +Interdum varius sit amet mattis vulputate. +A arcu cursus vitae congue. +Risus nec feugiat in fermentum posuere urna nec tincidunt praesent. +Sit amet dictum sit amet justo donec enim diam. +Maecenas accumsan lacus vel facilisis. +Erat velit scelerisque in dictum non consectetur a. +Tempor orci dapibus ultrices in iaculis nunc. +Nisi lacus sed viverra tellus. diff --git a/test/apps/testdir/alpha/alpha_second b/test/apps/testdir/alpha/alpha_second new file mode 100644 index 0000000..ae83ce8 --- /dev/null +++ b/test/apps/testdir/alpha/alpha_second @@ -0,0 +1,12 @@ +Et sollicitudin ac orci phasellus egestas tellus rutrum tellus. +Orci dapibus ultrices in iaculis nunc sed augue. +Tincidunt vitae semper quis lectus nulla at. +Maecenas ultricies mi eget mauris pharetra et. +Porttitor massa id neque aliquam vestibulum morbi blandit. +Et magnis dis parturient montes nascetur ridiculus mus mauris. +Ac orci phasellus egestas tellus rutrum tellus pellentesque. +Donec ac odio tempor orci dapibus. +Quis imperdiet massa tincidunt nunc pulvinar sapien et ligula. +Potenti nullam ac tortor vitae purus faucibus ornare suspendisse sed. +Orci porta non pulvinar neque laoreet suspendisse interdum consectetur. +Mauris pellentesque pulvinar pellentesque habitant morbi tristique. diff --git a/test/apps/testdir/alpha/omega/omega_first b/test/apps/testdir/alpha/omega/omega_first new file mode 100644 index 0000000..e594737 --- /dev/null +++ b/test/apps/testdir/alpha/omega/omega_first @@ -0,0 +1,14 @@ +Neque gravida in fermentum et sollicitudin. +Sodales ut eu sem integer vitae justo eget magna fermentum. +Amet nulla facilisi morbi tempus iaculis. +Proin sagittis nisl rhoncus mattis rhoncus urna neque. +Aliquam sem fringilla ut morbi tincidunt augue interdum velit euismod. +Sagittis eu volutpat odio facilisis. +Aliquet porttitor lacus luctus accumsan tortor posuere ac ut. +Sed arcu non odio euismod lacinia. +Faucibus et molestie ac feugiat. +Urna neque viverra justo nec ultrices dui sapien eget. +Amet commodo nulla facilisi nullam. +Pretium lectus quam id leo in vitae. +A cras semper auctor neque. +Sed arcu non odio euismod lacinia at quis risus sed. diff --git a/test/apps/testdir/alpha/omega/omega_second b/test/apps/testdir/alpha/omega/omega_second new file mode 100644 index 0000000..0c9857d --- /dev/null +++ b/test/apps/testdir/alpha/omega/omega_second @@ -0,0 +1,10 @@ +Commodo sed egestas egestas fringilla phasellus. +Ac tortor dignissim convallis aenean et tortor at risus. +Lorem dolor sed viverra ipsum nunc aliquet bibendum enim. +Quis lectus nulla at volutpat diam ut. +Tincidunt id aliquet risus feugiat in ante metus. +Tincidunt arcu non sodales neque. +Amet est placerat in egestas erat imperdiet sed euismod. +Duis tristique sollicitudin nibh sit amet. +Sed arcu non odio euismod lacinia at. +Ullamcorper morbi tincidunt ornare massa eget egestas purus viverra accumsan. diff --git a/test/apps/testdir/foo/bar/bar_first b/test/apps/testdir/foo/bar/bar_first new file mode 100644 index 0000000..e9edb3f --- /dev/null +++ b/test/apps/testdir/foo/bar/bar_first @@ -0,0 +1,20 @@ +Elementum eu facilisis sed odio morbi quis commodo. +Enim nunc faucibus a pellentesque sit amet porttitor. +Etiam non quam lacus suspendisse faucibus interdum. +Viverra aliquet eget sit amet tellus. +Arcu vitae elementum curabitur vitae. +Feugiat vivamus at augue eget arcu dictum. +Commodo quis imperdiet massa tincidunt nunc. +Urna duis convallis convallis tellus id interdum. +Commodo sed egestas egestas fringilla phasellus. +Sodales neque sodales ut etiam sit amet nisl. +Sem integer vitae justo eget magna fermentum iaculis. +Id diam maecenas ultricies mi. +Aliquet nibh praesent tristique magna sit amet purus gravida. +Ut enim blandit volutpat maecenas volutpat. +Ipsum a arcu cursus vitae congue mauris. +Donec ultrices tincidunt arcu non. +Nulla posuere sollicitudin aliquam ultrices sagittis orci a scelerisque purus. +Egestas maecenas pharetra convallis posuere. +Feugiat in fermentum posuere urna nec. +Nulla malesuada pellentesque elit eget gravida cum sociis. diff --git a/test/apps/testdir/foo/bar/bar_second b/test/apps/testdir/foo/bar/bar_second new file mode 100644 index 0000000..fb95896 --- /dev/null +++ b/test/apps/testdir/foo/bar/bar_second @@ -0,0 +1,14 @@ +Augue ut lectus arcu bibendum at varius vel pharetra vel. +Mattis aliquam faucibus purus in. +In tellus integer feugiat scelerisque. +Eget velit aliquet sagittis id consectetur purus ut faucibus pulvinar. +Augue mauris augue neque gravida. +Pulvinar neque laoreet suspendisse interdum consectetur libero id faucibus. +Tellus elementum sagittis vitae et leo duis. +Eget est lorem ipsum dolor sit amet consectetur. +Volutpat sed cras ornare arcu. +Faucibus a pellentesque sit amet. +Turpis egestas maecenas pharetra convallis. +Faucibus interdum posuere lorem ipsum dolor sit amet. +Id semper risus in hendrerit. +Amet volutpat consequat mauris nunc. diff --git a/test/apps/testdir/foo/foo_first b/test/apps/testdir/foo/foo_first new file mode 100644 index 0000000..ed1e052 --- /dev/null +++ b/test/apps/testdir/foo/foo_first @@ -0,0 +1,11 @@ +Venenatis tellus in metus vulputate eu scelerisque felis imperdiet proin. +Orci phasellus egestas tellus rutrum. +Feugiat vivamus at augue eget arcu dictum varius. +Justo eget magna fermentum iaculis eu non. +A erat nam at lectus urna duis. +Quam quisque id diam vel quam elementum pulvinar etiam. +Amet commodo nulla facilisi nullam vehicula ipsum a. +Sapien faucibus et molestie ac feugiat. +Aliquam vestibulum morbi blandit cursus risus at ultrices. +Purus faucibus ornare suspendisse sed nisi. +In massa tempor nec feugiat nisl pretium fusce id velit. diff --git a/test/apps/testdir/foo/foo_second b/test/apps/testdir/foo/foo_second new file mode 100644 index 0000000..95e46ae --- /dev/null +++ b/test/apps/testdir/foo/foo_second @@ -0,0 +1,12 @@ +Sit amet consectetur adipiscing elit ut aliquam purus. +Vulputate dignissim suspendisse in est ante in nibh. +Eu feugiat pretium nibh ipsum consequat nisl vel pretium. +Egestas purus viverra accumsan in nisl. +Ac odio tempor orci dapibus ultrices. +At imperdiet dui accumsan sit amet. +Elementum integer enim neque volutpat ac tincidunt vitae semper. +Mi in nulla posuere sollicitudin aliquam ultrices sagittis. +Aliquam sem et tortor consequat. +Tristique senectus et netus et malesuada fames ac turpis. +Quis hendrerit dolor magna eget est lorem ipsum. +Ut consequat semper viverra nam libero. diff --git a/test/apps/testdir/td_first b/test/apps/testdir/td_first new file mode 100644 index 0000000..21eab9c --- /dev/null +++ b/test/apps/testdir/td_first @@ -0,0 +1,18 @@ +Urna duis convallis convallis tellus id interdum velit. +Risus in hendrerit gravida rutrum. +Odio pellentesque diam volutpat commodo sed. +Duis convallis convallis tellus id interdum velit laoreet id donec. +Duis at tellus at urna. +Egestas maecenas pharetra convallis posuere morbi leo urna molestie at. +Et leo duis ut diam quam nulla porttitor massa id. +Nunc eget lorem dolor sed viverra ipsum nunc aliquet bibendum. +Sodales ut etiam sit amet nisl purus in. +Ac felis donec et odio pellentesque diam volutpat commodo. +Nunc mi ipsum faucibus vitae aliquet. +Volutpat ac tincidunt vitae semper quis lectus nulla at volutpat. +Mollis aliquam ut porttitor leo. +Vestibulum rhoncus est pellentesque elit ullamcorper dignissim cras. +Pulvinar proin gravida hendrerit lectus a. +Ultrices dui sapien eget mi proin. +Dui vivamus arcu felis bibendum ut. +Aliquam eleifend mi in nulla posuere sollicitudin aliquam ultrices sagittis. diff --git a/test/apps/testdir/td_second b/test/apps/testdir/td_second new file mode 100644 index 0000000..496ff0e --- /dev/null +++ b/test/apps/testdir/td_second @@ -0,0 +1,14 @@ +Egestas purus viverra accumsan in. +Auctor urna nunc id cursus metus aliquam eleifend. +Morbi tincidunt augue interdum velit. +In egestas erat imperdiet sed euismod nisi porta lorem mollis. +Sed augue lacus viverra vitae congue eu consequat. +Ut pharetra sit amet aliquam id. +Aenean euismod elementum nisi quis eleifend. +Hac habitasse platea dictumst vestibulum rhoncus est pellentesque elit ullamcorper. +Eget nunc lobortis mattis aliquam faucibus purus. +Sit amet luctus venenatis lectus magna fringilla. +Placerat orci nulla pellentesque dignissim enim sit amet venenatis. +Montes nascetur ridiculus mus mauris. +Morbi enim nunc faucibus a pellentesque sit amet. +Et netus et malesuada fames ac turpis egestas. diff --git a/test/apps/testfile b/test/apps/testfile new file mode 100644 index 0000000..b56928e --- /dev/null +++ b/test/apps/testfile @@ -0,0 +1,16 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +Fames ac turpis egestas maecenas pharetra convallis posuere morbi. +Etiam erat velit scelerisque in dictum non consectetur a erat. +Dolor purus non enim praesent elementum facilisis. +Nulla porttitor massa id neque aliquam vestibulum morbi blandit cursus. +Adipiscing vitae proin sagittis nisl rhoncus mattis rhoncus urna neque. +Aenean pharetra magna ac placerat. +Pulvinar proin gravida hendrerit lectus a. +Iaculis nunc sed augue lacus viverra vitae. +Ac tortor vitae purus faucibus ornare suspendisse sed. +Purus in mollis nunc sed id semper. +Non consectetur a erat nam at lectus urna. +In ante metus dictum at tempor commodo ullamcorper. +Auctor augue mauris augue neque gravida in fermentum. +Nunc scelerisque viverra mauris in. +Morbi leo urna molestie at elementum. diff --git a/test/lib/__init__.py b/test/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py new file mode 100644 index 0000000..277ac67 --- /dev/null +++ b/test/lib/test_bsie.py @@ -0,0 +1,231 @@ +""" + +Part of the bsie test suite. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import os +import unittest + +# bsie imports +from bsie.tools import builder +from bsie.utils import ns +from bsie.utils.bsfs import URI, schema +from bsie.utils.node import Node + +# objects to test +from bsie.lib.bsie import BSIE + + +## code ## + +class TestBSIE(unittest.TestCase): + def setUp(self): + # reader builder + rbuild = builder.ReaderBuilder({}) + # extractor builder + ebuild = builder.ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.constant.Constant': dict( + tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], + schema=''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''', + )}, + ]) + # build pipeline + self.prefix = URI('http://example.com/local/file#') + pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild) + self.pipeline = pbuild.build() + + def test_construction(self): + # pipeline only + lib = BSIE(self.pipeline) + self.assertSetEqual(lib.predicates, { + ns.bse.filename, + ns.bse.filesize, + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + + # specify collect + lib = BSIE(self.pipeline, collect={ + ns.bse.filesize, + ns.bse.author, + ns.bse.inexistent, + }) + self.assertSetEqual(lib.predicates, { + ns.bse.filesize, + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + # empty collect is disregarded + lib = BSIE(self.pipeline, collect={}) + self.assertSetEqual(lib.predicates, { + ns.bse.filename, + ns.bse.filesize, + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "false"^^xsd:boolean . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + + # specify discard + lib = BSIE(self.pipeline, discard={ + ns.bse.filesize, + ns.bse.filename, + ns.bse.inexistent, + }) + self.assertSetEqual(lib.predicates, { + ns.bse.author, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + ''')) + + # specify collect and discard + lib = BSIE(self.pipeline, + collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar}, + discard={ns.bse.author, ns.bse.foo, ns.bse.foobar}, + ) + self.assertSetEqual(lib.predicates, { + ns.bse.filesize, + }) + self.assertEqual(lib.schema, schema.Schema.from_string(''' + prefix rdfs: + prefix xsd: + prefix bsfs: + prefix bse: + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + # common definitions + xsd:integer rdfs:subClassOf bsfs:Literal . + + bse:filesize rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:integer; + bsfs:unique "false"^^xsd:boolean . + + ''')) + + + def test_from_file(self): + # setup + lib = BSIE(self.pipeline) + self.assertSetEqual(set(lib.predicates), { + ns.bse.filesize, + ns.bse.filename, + ns.bse.author, + }) + content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' + subject = Node(ns.bsfs.Entity, self.prefix + content_hash) + testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') + + # from_file extracts all available triples + self.assertSetEqual(set(lib.from_file(testfile)), { + (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'), + (subject, lib.schema.predicate(ns.bse.filesize), 12), + (subject, lib.schema.predicate(ns.bse.author), 'Me, myself, and I'), + }) + + # from_file respects predicate argument + self.assertSetEqual(set(lib.from_file(testfile, {ns.bse.filename, ns.bse.invalid})), { + (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'), + }) + + +## main ## + +if __name__ == '__main__': + unittest.main() + +## EOF ## diff --git a/test/lib/testfile.t b/test/lib/testfile.t new file mode 100644 index 0000000..3b18e51 --- /dev/null +++ b/test/lib/testfile.t @@ -0,0 +1 @@ +hello world diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py index f98b329..0dd8c75 100644 --- a/test/tools/test_pipeline.py +++ b/test/tools/test_pipeline.py @@ -95,7 +95,7 @@ class TestPipeline(unittest.TestCase): # build pipeline pipeline = Pipeline(self.prefix, self.ext2rdr) # build objects for tests - content_hash = 'e3bb4ab54e4a50d75626a1f76814f152f4edc60a82ad724aa2aa922ca5534427' + content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' subject = Node(ns.bsfs.Entity, self.prefix + content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) @@ -108,7 +108,7 @@ class TestPipeline(unittest.TestCase): # extract given predicates self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), { (subject, p_filename, 'testfile.t'), - (subject, p_filesize, 11), + (subject, p_filesize, 12), }) self.assertSetEqual(set(pipeline(testfile, {p_author})), { (subject, p_author, 'Me, myself, and I'), @@ -117,12 +117,12 @@ class TestPipeline(unittest.TestCase): (subject, p_filename, 'testfile.t'), }) self.assertSetEqual(set(pipeline(testfile, {p_filesize})), { - (subject, p_filesize, 11), + (subject, p_filesize, 12), }) # extract all predicates self.assertSetEqual(set(pipeline(testfile)), { (subject, p_filename, 'testfile.t'), - (subject, p_filesize, 11), + (subject, p_filesize, 12), (subject, p_author, 'Me, myself, and I'), (subject, p_rating, 123), }) @@ -158,6 +158,18 @@ class TestPipeline(unittest.TestCase): p_filename = pipeline.schema.predicate(ns.bse.filename) self.assertSetEqual(set(pipeline(testfile, {p_filename})), set()) + def test_predicates(self): + # build pipeline + pipeline = Pipeline(self.prefix, self.ext2rdr) + # + self.assertSetEqual(set(pipeline.predicates()), { + pipeline.schema.predicate(ns.bsfs.Predicate), + pipeline.schema.predicate(ns.bse.filename), + pipeline.schema.predicate(ns.bse.filesize), + pipeline.schema.predicate(ns.bse.author), + pipeline.schema.predicate(ns.bse.rating), + }) + ## main ## diff --git a/test/tools/testfile.t b/test/tools/testfile.t index 58bf1b8..3b18e51 100644 --- a/test/tools/testfile.t +++ b/test/tools/testfile.t @@ -1 +1 @@ -hello worl +hello world -- cgit v1.2.3