aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xbsie.app49
-rw-r--r--bsie/__init__.py6
-rw-r--r--bsie/apps/__init__.py20
-rw-r--r--bsie/apps/index.py131
-rw-r--r--bsie/apps/info.py74
-rw-r--r--bsie/base/errors.py6
-rw-r--r--bsie/lib/__init__.py13
-rw-r--r--bsie/lib/bsie.py80
-rw-r--r--bsie/tools/pipeline.py4
-rw-r--r--bsie/utils/namespaces.py2
-rw-r--r--test/apps/__init__.py0
-rw-r--r--test/apps/test_index.py159
-rw-r--r--test/apps/test_info.py42
-rw-r--r--test/apps/testdir/alpha/alpha_first16
-rw-r--r--test/apps/testdir/alpha/alpha_second12
-rw-r--r--test/apps/testdir/alpha/omega/omega_first14
-rw-r--r--test/apps/testdir/alpha/omega/omega_second10
-rw-r--r--test/apps/testdir/foo/bar/bar_first20
-rw-r--r--test/apps/testdir/foo/bar/bar_second14
-rw-r--r--test/apps/testdir/foo/foo_first11
-rw-r--r--test/apps/testdir/foo/foo_second12
-rw-r--r--test/apps/testdir/td_first18
-rw-r--r--test/apps/testdir/td_second14
-rw-r--r--test/apps/testfile16
-rw-r--r--test/lib/__init__.py0
-rw-r--r--test/lib/test_bsie.py231
-rw-r--r--test/lib/testfile.t1
-rw-r--r--test/tools/test_pipeline.py20
-rw-r--r--test/tools/testfile.t2
29 files changed, 991 insertions, 6 deletions
diff --git a/bsie.app b/bsie.app
new file mode 100755
index 0000000..ba9cee7
--- /dev/null
+++ b/bsie.app
@@ -0,0 +1,49 @@
+"""BSIE tools.
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import argparse
+import typing
+
+# module imports
+import bsie
+import bsie.apps
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'main',
+ )
+
+# config
+apps = {
+ 'index' : bsie.apps.index,
+ 'info' : bsie.apps.info,
+ }
+
+
+## code ##
+
+def main(argv):
+ """Black Star File System maintenance tools."""
+ parser = argparse.ArgumentParser(description=main.__doc__, prog='bsie')
+ parser.add_argument('--version', action='version',
+ version='%(prog)s version {}.{}.{}'.format(*bsie.version_info))
+ parser.add_argument('app', choices=apps.keys(),
+ help='Select the application to run.')
+ parser.add_argument('rest', nargs=argparse.REMAINDER)
+ # parse
+ args = parser.parse_args()
+ # run application
+ apps[args.app](args.rest)
+
+
+## main ##
+
+if __name__ == '__main__':
+ import sys
+ main(sys.argv[1:])
+
+## EOF ##
diff --git a/bsie/__init__.py b/bsie/__init__.py
index 2f2477a..2b874bd 100644
--- a/bsie/__init__.py
+++ b/bsie/__init__.py
@@ -5,8 +5,14 @@ A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
# imports
+import collections
import typing
+# constants
+version_info = collections.namedtuple('version_info',
+ ('major', 'minor', 'micro')) \
+ (0, 0, 1)
+
# exports
__all__: typing.Sequence[str] = []
diff --git a/bsie/apps/__init__.py b/bsie/apps/__init__.py
new file mode 100644
index 0000000..a548c3c
--- /dev/null
+++ b/bsie/apps/__init__.py
@@ -0,0 +1,20 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# inner-module imports
+from .index import main as index
+from .info import main as info
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'index',
+ 'info',
+ )
+
+## EOF ##
diff --git a/bsie/apps/index.py b/bsie/apps/index.py
new file mode 100644
index 0000000..821aa4c
--- /dev/null
+++ b/bsie/apps/index.py
@@ -0,0 +1,131 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import argparse
+import os
+import typing
+
+# bsfs imports
+import bsfs
+
+# bsie imports
+from bsie.base import errors
+from bsie.lib.bsie import BSIE
+from bsie.tools import builder
+from bsie.utils.bsfs import URI
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'main',
+ )
+
+
+## code ##
+
+def main(argv):
+ """Index files or directories into BSFS."""
+ parser = argparse.ArgumentParser(description=main.__doc__, prog='index')
+ parser.add_argument('--user', type=URI, default=URI('http://example.com/me'),
+ help='')
+ parser.add_argument('--collect', action='append', default=[],
+ help='')
+ parser.add_argument('--discard', action='append', default=[],
+ help='')
+ parser.add_argument('-r', '--recursive', action='store_true', default=False,
+ help='')
+ parser.add_argument('--follow', action='store_true', default=False,
+ help='')
+ parser.add_argument('--print', action='store_true', default=False,
+ help='')
+ parser.add_argument('input_file', nargs=argparse.REMAINDER,
+ help='')
+ args = parser.parse_args(argv)
+
+ # FIXME: Read reader/extractor configs from a config file
+ # reader builder
+ rbuild = builder.ReaderBuilder({})
+ # extractor builder
+ ebuild = builder.ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.constant.Constant': dict(
+ tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')],
+ schema='''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ ''',
+ )},
+ ])
+ # pipeline builder
+ prefix = URI(args.user + ('file#' if args.user.endswith('/') else '/file#'))
+ pbuild = builder.PipelineBuilder(
+ prefix,
+ rbuild,
+ ebuild,
+ )
+
+ # build pipeline
+ pipeline = pbuild.build()
+ # build BSIE frontend
+ bsie = BSIE(pipeline, args.collect, args.discard)
+
+
+ def walk(handle):
+ """Walk through given input files."""
+ # FIXME: collect all triples by node, set all predicates at once
+ # FIXME: simplify code (below but maybe also above)
+ # FIXME: How to handle dependencies between data?
+ # E.g. do I still want to link to a tag despite not being permitted to set its label?
+ # FIXME: node renaming?
+
+ # index input paths
+ for path in args.input_file:
+ if os.path.isdir(path) and args.recursive:
+ for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow):
+ for filename in filenames:
+ for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)):
+ handle(node, pred, value)
+ elif os.path.isfile(path):
+ for node, pred, value in bsie.from_file(path):
+ handle(node, pred, value)
+ else:
+ raise errors.UnreachableError()
+
+
+ if args.print:
+ walk(print)
+ return None
+
+ else:
+ # initialize bsfs
+ # NOTE: With presistent storages, the schema migration will be a seaparte operation.
+ # Here, we'd simply examine the schema and potentially discard more predicates.
+ store = bsfs.Open({
+ 'Graph': {
+ 'user': args.user,
+ 'backend': {
+ 'SparqlStore': {}},
+ }})
+ store.migrate(bsie.schema)
+ # process files
+ def handle(node, pred, value):
+ store.node(node.node_type, node.uri).set(pred.uri, value)
+ walk(handle)
+ # return store
+ return store
+
+
+
+## main ##
+
+if __name__ == '__main__':
+ import sys
+ main(sys.argv[1:])
+
+## EOF ##
diff --git a/bsie/apps/info.py b/bsie/apps/info.py
new file mode 100644
index 0000000..8cc6dca
--- /dev/null
+++ b/bsie/apps/info.py
@@ -0,0 +1,74 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import argparse
+import sys
+import typing
+
+# bsie imports
+from bsie.base import errors
+from bsie.tools import builder
+from bsie.utils.bsfs import URI
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'main',
+ )
+
+
+## code ##
+
+def main(argv):
+ """Show information from BSIE."""
+ parser = argparse.ArgumentParser(description=main.__doc__, prog='info')
+ parser.add_argument('what', choices=('predicates', ),
+ help='Select what information to show.')
+ args = parser.parse_args(argv)
+
+ # FIXME: Read reader/extractor configs from a config file
+ # reader builder
+ rbuild = builder.ReaderBuilder({})
+ # extractor builder
+ ebuild = builder.ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.constant.Constant': dict(
+ tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')],
+ schema='''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ ''',
+ )},
+ ])
+ # pipeline builder
+ pbuild = builder.PipelineBuilder(
+ URI('http://example.com/me/file#'), # not actually used
+ rbuild,
+ ebuild,
+ )
+
+ # build pipeline
+ pipeline = pbuild.build()
+
+ # show info
+ if args.what == 'predicates':
+ # show predicates
+ for pred in pipeline.schema.predicates():
+ print(pred.uri)
+ else:
+ # args.what is already checked by argparse
+ raise errors.UnreachableError()
+
+
+## main ##
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
+
+## EOF ##
diff --git a/bsie/base/errors.py b/bsie/base/errors.py
index 760351f..dc3c30e 100644
--- a/bsie/base/errors.py
+++ b/bsie/base/errors.py
@@ -33,4 +33,10 @@ class ExtractorError(_BSIEError):
class ReaderError(_BSIEError):
"""The Reader failed to read the given file."""
+class ProgrammingError(_BSIEError):
+ """An assertion-like error that indicates a code-base issue."""
+
+class UnreachableError(ProgrammingError):
+ """Bravo, you've reached a point in code that should logically not be reachable."""
+
## EOF ##
diff --git a/bsie/lib/__init__.py b/bsie/lib/__init__.py
new file mode 100644
index 0000000..f6c9018
--- /dev/null
+++ b/bsie/lib/__init__.py
@@ -0,0 +1,13 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# exports
+__all__: typing.Sequence[str] = []
+
+## EOF ##
diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py
new file mode 100644
index 0000000..aeccc8c
--- /dev/null
+++ b/bsie/lib/bsie.py
@@ -0,0 +1,80 @@
+"""
+
+Part of the bsie module.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import typing
+
+# bsie imports
+from bsie.tools.pipeline import Pipeline
+from bsie.utils import node, ns
+from bsie.utils.bsfs import URI, schema as schema_
+
+# exports
+__all__: typing.Sequence[str] = (
+ 'BSIE',
+ )
+
+
+## code ##
+
+class BSIE():
+ """Extract triples from files.
+
+ Controls which predicates to extract (*collect*) and
+ which to not extract (*discard*). Note that this only affects
+ principal predicates not auxililary predicates like, e.g., tag labels.
+
+ """
+
+ # predicates to extract.
+ predicates: typing.Set[URI]
+
+ # local schema.
+ schema: schema_.Schema
+
+ def __init__(
+ self,
+ # pipeline builder.
+ pipeline: Pipeline,
+ # predicates to extract at most. None implies all available w.r.t. extractors.
+ collect: typing.Optional[typing.Iterable[URI]] = None,
+ # predicates to discard.
+ discard: typing.Optional[typing.Iterable[URI]] = None,
+ ):
+ # store pipeline
+ self.pipeline = pipeline
+ # start off with available predicates
+ self.predicates = {pred.uri for pred in self.pipeline.predicates()}
+ # limit predicates to specified ones by argument.
+ if collect is not None:
+ collect = set(collect)
+ if len(collect) > 0:
+ self.predicates &= collect
+ # discard predicates.
+ if discard is not None:
+ self.predicates -= set(discard)
+ # discard ns.bsfs.Predicate
+ self.predicates.discard(ns.bsfs.Predicate)
+ # compile a schema that only contains the requested predicates (and implied types)
+ self.schema = schema_.Schema({
+ self.pipeline.schema.predicate(pred) for pred in self.predicates})
+
+ def from_file(
+ self,
+ path: URI,
+ predicates: typing.Optional[typing.Iterable[URI]] = None,
+ ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
+ """Produce triples for a given *path*. Limit to *predicates* if given."""
+ # get requested predicates.
+ predicates = set(predicates) if predicates is not None else self.predicates
+ # filter through requested predicates.
+ predicates &= self.predicates
+ # predicate lookup
+ predicates = {self.schema.predicate(pred) for pred in predicates}
+ # invoke pipeline
+ yield from self.pipeline(path, predicates)
+
+## EOF ##
diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py
index 8e1c992..da422c0 100644
--- a/bsie/tools/pipeline.py
+++ b/bsie/tools/pipeline.py
@@ -70,6 +70,10 @@ class Pipeline():
and self._prefix == other._prefix \
and self._ext2rdr == other._ext2rdr
+ def predicates(self) -> typing.Iterator[_schema.Predicate]:
+ """Return the predicates that are extracted from a file."""
+ return iter({pred for ext in self._ext2rdr for pred in ext.predicates()})
+
def __call__(
self,
path: URI,
diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py
index 13be96b..2fcb2dc 100644
--- a/bsie/utils/namespaces.py
+++ b/bsie/utils/namespaces.py
@@ -13,7 +13,7 @@ from . import bsfs as _bsfs
# constants
bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity#')
bsfs = _bsfs.Namespace('http://bsfs.ai/schema/')
-bsm = _bsfs.Namespace('http://bsfs.ai/schema/meta#')
+bsm = _bsfs.Namespace('http://bsfs.ai/schema/Meta#')
xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema#')
# export
diff --git a/test/apps/__init__.py b/test/apps/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/apps/__init__.py
diff --git a/test/apps/test_index.py b/test/apps/test_index.py
new file mode 100644
index 0000000..6d47df8
--- /dev/null
+++ b/test/apps/test_index.py
@@ -0,0 +1,159 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import io
+import os
+import rdflib
+import sys
+import unittest
+
+# bsie imports
+from bsie.utils import ns
+
+# objects to test
+from bsie.apps.index import main
+
+
+## code ##
+
+class TestIndex(unittest.TestCase):
+ def test_main(self):
+ bsfs = main([
+ '-r',
+ '--user', 'http://example.com/me',
+ os.path.join(os.path.dirname(__file__), 'testdir'),
+ os.path.join(os.path.dirname(__file__), 'testfile'),
+ ])
+
+ prefix = 'http://example.com/me/file#'
+ self.assertTrue(set(bsfs._Graph__backend.graph).issuperset({
+ (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+ (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_second', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('696', datatype=rdflib.XSD.integer)),
+ (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+ (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_second', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('503', datatype=rdflib.XSD.integer)),
+ (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+ (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_first', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('911', datatype=rdflib.XSD.integer)),
+ (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+ (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('testfile', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('885', datatype=rdflib.XSD.integer)),
+ (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+ (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_first', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('956', datatype=rdflib.XSD.integer)),
+ (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+ (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('omega_first', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('648', datatype=rdflib.XSD.integer)),
+ (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+ (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('alpha_first', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('754', datatype=rdflib.XSD.integer)),
+ (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+ (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_second', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('585', datatype=rdflib.XSD.integer)),
+ (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+ (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('bar_second', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('636', datatype=rdflib.XSD.integer)),
+ (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+ (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('foo_first', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('546', datatype=rdflib.XSD.integer)),
+ (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.RDF.type, rdflib.URIRef(ns.bsfs.Entity)),
+ (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.author), rdflib.Literal('Me, myself, and I', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filename), rdflib.Literal('td_second', datatype=rdflib.XSD.string)),
+ (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bse.filesize), rdflib.Literal('703', datatype=rdflib.XSD.integer)),
+ }))
+
+ # NOTE: we don't check ns.bsm.t_created since it depends on the execution time. Triples would look like this:
+ # (rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+ # (rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+ # (rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+ # (rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+ # (rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+ # (rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+ # (rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+ # (rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+ # (rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+ # (rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+ # (rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'), rdflib.URIRef(ns.bsm.t_created), rdflib.Literal('1670..........', datatype=rdflib.XSD.integer)),
+ # instead, we simply check if there's such a predicate for each file
+ self.assertSetEqual({sub for sub, _ in bsfs._Graph__backend.graph.subject_objects(rdflib.URIRef(ns.bsm.t_created))}, {
+ rdflib.URIRef(prefix + '2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647'),
+ rdflib.URIRef(prefix + '441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece'),
+ rdflib.URIRef(prefix + '69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871'),
+ rdflib.URIRef(prefix + '78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926'),
+ rdflib.URIRef(prefix + '80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3'),
+ rdflib.URIRef(prefix + '976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795'),
+ rdflib.URIRef(prefix + '997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3'),
+ rdflib.URIRef(prefix + 'a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d'),
+ rdflib.URIRef(prefix + 'b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70'),
+ rdflib.URIRef(prefix + 'd43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d'),
+ rdflib.URIRef(prefix + 'd803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1'),
+ })
+
+ def test_print(self):
+ stdout, sys.stdout = sys.stdout, io.StringIO()
+ bsfs = main([
+ '--print',
+ '-r',
+ '--user', 'http://example.com/me',
+ os.path.join(os.path.dirname(__file__), 'testdir'),
+ os.path.join(os.path.dirname(__file__), 'testfile'),
+ ])
+ outbuf, sys.stdout = sys.stdout, stdout
+ self.assertSetEqual(set(outbuf.getvalue().split('\n')) - {''}, {
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.author}) Me, myself, and I',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filename}) alpha_second',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#2f4109b40107cc50e0884755a1a961ed126887e49b8dbaf0e146b2e226aa6647) Predicate({ns.bse.filesize}) 696',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.author}) Me, myself, and I',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filename}) omega_second',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#441f3d10c8ff489fe8e33e639606512f6c463151cc429de7e554b9af670c2ece) Predicate({ns.bse.filesize}) 503',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.author}) Me, myself, and I',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filename}) td_first',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#69b98ecf7aff3e95b09688ba93331678eb8397817111f674c9558e6dd8f5e871) Predicate({ns.bse.filesize}) 911',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.author}) Me, myself, and I',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filename}) testfile',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#78f7eb7f0d8221cdb2cb26c978fa42a11f75eb87becc768f4474134cb1e06926) Predicate({ns.bse.filesize}) 885',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.author}) Me, myself, and I',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filename}) bar_first',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#80818b8ec2ee1919116dba9c8a7e0a4608313cf3b463cd88e9ed77a700dd92d3) Predicate({ns.bse.filesize}) 956',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.author}) Me, myself, and I',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filename}) omega_first',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#976d2ea0e58488678cc7e435fbfadabfb6eb6cf50ad51862f38f73729ed11795) Predicate({ns.bse.filesize}) 648',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.author}) Me, myself, and I',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filename}) alpha_first',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#997e2fbb7494a3818ec782d2bc87bf1cffafba6b9c0f658e4a6c18a723e944d3) Predicate({ns.bse.filesize}) 754',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.author}) Me, myself, and I',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filename}) foo_second',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#a8af899ecdab60dfaea8ec7f934053624c80a1054539e163f2c7eaa986c2777d) Predicate({ns.bse.filesize}) 585',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.author}) Me, myself, and I',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filename}) bar_second',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#b8fd7fba818254166a6043195004138ebda6923e012442f819a2c49671136c70) Predicate({ns.bse.filesize}) 636',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.author}) Me, myself, and I',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filename}) foo_first',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d43758ace82154a1cc10ca0dfef63cb20dd831f9c87edd6dc06539eefe67371d) Predicate({ns.bse.filesize}) 546',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.author}) Me, myself, and I',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filename}) td_second',
+ f'Node(http://bsfs.ai/schema/Entity, http://example.com/me/file#d803187cbf3676ae9d38126270a6152c60431589aa3bb3824baf8954e9c097f1) Predicate({ns.bse.filesize}) 703',
+ })
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/apps/test_info.py b/test/apps/test_info.py
new file mode 100644
index 0000000..60a540e
--- /dev/null
+++ b/test/apps/test_info.py
@@ -0,0 +1,42 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import argparse
+import io
+import os
+import sys
+import unittest
+
+# objects to test
+from bsie.apps.info import main
+
+
+## code ##
+
+class TestIndex(unittest.TestCase):
+ def test_predicates(self):
+ stdout, sys.stdout = sys.stdout, io.StringIO()
+ # show predicates infos
+ main(['predicates'])
+ outbuf, sys.stdout = sys.stdout, stdout
+ # verify output
+ self.assertSetEqual({pred for pred in outbuf.getvalue().split('\n') if pred != ''}, {
+ 'http://bsfs.ai/schema/Entity#author',
+ 'http://bsfs.ai/schema/Predicate',
+ 'http://bsfs.ai/schema/Entity#filename',
+ 'http://bsfs.ai/schema/Entity#filesize',
+ })
+
+ def test_invalid(self):
+ self.assertRaises(SystemExit, main, ['foobar'])
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/apps/testdir/alpha/alpha_first b/test/apps/testdir/alpha/alpha_first
new file mode 100644
index 0000000..f96fdee
--- /dev/null
+++ b/test/apps/testdir/alpha/alpha_first
@@ -0,0 +1,16 @@
+Turpis tincidunt id aliquet risus feugiat in ante metus.
+Vel turpis nunc eget lorem dolor.
+Lorem mollis aliquam ut porttitor leo a diam sollicitudin.
+Sit amet mattis vulputate enim nulla aliquet porttitor lacus luctus.
+Vitae et leo duis ut diam.
+Integer eget aliquet nibh praesent tristique magna sit.
+Volutpat sed cras ornare arcu dui.
+Consectetur adipiscing elit duis tristique sollicitudin nibh.
+Interdum varius sit amet mattis vulputate.
+A arcu cursus vitae congue.
+Risus nec feugiat in fermentum posuere urna nec tincidunt praesent.
+Sit amet dictum sit amet justo donec enim diam.
+Maecenas accumsan lacus vel facilisis.
+Erat velit scelerisque in dictum non consectetur a.
+Tempor orci dapibus ultrices in iaculis nunc.
+Nisi lacus sed viverra tellus.
diff --git a/test/apps/testdir/alpha/alpha_second b/test/apps/testdir/alpha/alpha_second
new file mode 100644
index 0000000..ae83ce8
--- /dev/null
+++ b/test/apps/testdir/alpha/alpha_second
@@ -0,0 +1,12 @@
+Et sollicitudin ac orci phasellus egestas tellus rutrum tellus.
+Orci dapibus ultrices in iaculis nunc sed augue.
+Tincidunt vitae semper quis lectus nulla at.
+Maecenas ultricies mi eget mauris pharetra et.
+Porttitor massa id neque aliquam vestibulum morbi blandit.
+Et magnis dis parturient montes nascetur ridiculus mus mauris.
+Ac orci phasellus egestas tellus rutrum tellus pellentesque.
+Donec ac odio tempor orci dapibus.
+Quis imperdiet massa tincidunt nunc pulvinar sapien et ligula.
+Potenti nullam ac tortor vitae purus faucibus ornare suspendisse sed.
+Orci porta non pulvinar neque laoreet suspendisse interdum consectetur.
+Mauris pellentesque pulvinar pellentesque habitant morbi tristique.
diff --git a/test/apps/testdir/alpha/omega/omega_first b/test/apps/testdir/alpha/omega/omega_first
new file mode 100644
index 0000000..e594737
--- /dev/null
+++ b/test/apps/testdir/alpha/omega/omega_first
@@ -0,0 +1,14 @@
+Neque gravida in fermentum et sollicitudin.
+Sodales ut eu sem integer vitae justo eget magna fermentum.
+Amet nulla facilisi morbi tempus iaculis.
+Proin sagittis nisl rhoncus mattis rhoncus urna neque.
+Aliquam sem fringilla ut morbi tincidunt augue interdum velit euismod.
+Sagittis eu volutpat odio facilisis.
+Aliquet porttitor lacus luctus accumsan tortor posuere ac ut.
+Sed arcu non odio euismod lacinia.
+Faucibus et molestie ac feugiat.
+Urna neque viverra justo nec ultrices dui sapien eget.
+Amet commodo nulla facilisi nullam.
+Pretium lectus quam id leo in vitae.
+A cras semper auctor neque.
+Sed arcu non odio euismod lacinia at quis risus sed.
diff --git a/test/apps/testdir/alpha/omega/omega_second b/test/apps/testdir/alpha/omega/omega_second
new file mode 100644
index 0000000..0c9857d
--- /dev/null
+++ b/test/apps/testdir/alpha/omega/omega_second
@@ -0,0 +1,10 @@
+Commodo sed egestas egestas fringilla phasellus.
+Ac tortor dignissim convallis aenean et tortor at risus.
+Lorem dolor sed viverra ipsum nunc aliquet bibendum enim.
+Quis lectus nulla at volutpat diam ut.
+Tincidunt id aliquet risus feugiat in ante metus.
+Tincidunt arcu non sodales neque.
+Amet est placerat in egestas erat imperdiet sed euismod.
+Duis tristique sollicitudin nibh sit amet.
+Sed arcu non odio euismod lacinia at.
+Ullamcorper morbi tincidunt ornare massa eget egestas purus viverra accumsan.
diff --git a/test/apps/testdir/foo/bar/bar_first b/test/apps/testdir/foo/bar/bar_first
new file mode 100644
index 0000000..e9edb3f
--- /dev/null
+++ b/test/apps/testdir/foo/bar/bar_first
@@ -0,0 +1,20 @@
+Elementum eu facilisis sed odio morbi quis commodo.
+Enim nunc faucibus a pellentesque sit amet porttitor.
+Etiam non quam lacus suspendisse faucibus interdum.
+Viverra aliquet eget sit amet tellus.
+Arcu vitae elementum curabitur vitae.
+Feugiat vivamus at augue eget arcu dictum.
+Commodo quis imperdiet massa tincidunt nunc.
+Urna duis convallis convallis tellus id interdum.
+Commodo sed egestas egestas fringilla phasellus.
+Sodales neque sodales ut etiam sit amet nisl.
+Sem integer vitae justo eget magna fermentum iaculis.
+Id diam maecenas ultricies mi.
+Aliquet nibh praesent tristique magna sit amet purus gravida.
+Ut enim blandit volutpat maecenas volutpat.
+Ipsum a arcu cursus vitae congue mauris.
+Donec ultrices tincidunt arcu non.
+Nulla posuere sollicitudin aliquam ultrices sagittis orci a scelerisque purus.
+Egestas maecenas pharetra convallis posuere.
+Feugiat in fermentum posuere urna nec.
+Nulla malesuada pellentesque elit eget gravida cum sociis.
diff --git a/test/apps/testdir/foo/bar/bar_second b/test/apps/testdir/foo/bar/bar_second
new file mode 100644
index 0000000..fb95896
--- /dev/null
+++ b/test/apps/testdir/foo/bar/bar_second
@@ -0,0 +1,14 @@
+Augue ut lectus arcu bibendum at varius vel pharetra vel.
+Mattis aliquam faucibus purus in.
+In tellus integer feugiat scelerisque.
+Eget velit aliquet sagittis id consectetur purus ut faucibus pulvinar.
+Augue mauris augue neque gravida.
+Pulvinar neque laoreet suspendisse interdum consectetur libero id faucibus.
+Tellus elementum sagittis vitae et leo duis.
+Eget est lorem ipsum dolor sit amet consectetur.
+Volutpat sed cras ornare arcu.
+Faucibus a pellentesque sit amet.
+Turpis egestas maecenas pharetra convallis.
+Faucibus interdum posuere lorem ipsum dolor sit amet.
+Id semper risus in hendrerit.
+Amet volutpat consequat mauris nunc.
diff --git a/test/apps/testdir/foo/foo_first b/test/apps/testdir/foo/foo_first
new file mode 100644
index 0000000..ed1e052
--- /dev/null
+++ b/test/apps/testdir/foo/foo_first
@@ -0,0 +1,11 @@
+Venenatis tellus in metus vulputate eu scelerisque felis imperdiet proin.
+Orci phasellus egestas tellus rutrum.
+Feugiat vivamus at augue eget arcu dictum varius.
+Justo eget magna fermentum iaculis eu non.
+A erat nam at lectus urna duis.
+Quam quisque id diam vel quam elementum pulvinar etiam.
+Amet commodo nulla facilisi nullam vehicula ipsum a.
+Sapien faucibus et molestie ac feugiat.
+Aliquam vestibulum morbi blandit cursus risus at ultrices.
+Purus faucibus ornare suspendisse sed nisi.
+In massa tempor nec feugiat nisl pretium fusce id velit.
diff --git a/test/apps/testdir/foo/foo_second b/test/apps/testdir/foo/foo_second
new file mode 100644
index 0000000..95e46ae
--- /dev/null
+++ b/test/apps/testdir/foo/foo_second
@@ -0,0 +1,12 @@
+Sit amet consectetur adipiscing elit ut aliquam purus.
+Vulputate dignissim suspendisse in est ante in nibh.
+Eu feugiat pretium nibh ipsum consequat nisl vel pretium.
+Egestas purus viverra accumsan in nisl.
+Ac odio tempor orci dapibus ultrices.
+At imperdiet dui accumsan sit amet.
+Elementum integer enim neque volutpat ac tincidunt vitae semper.
+Mi in nulla posuere sollicitudin aliquam ultrices sagittis.
+Aliquam sem et tortor consequat.
+Tristique senectus et netus et malesuada fames ac turpis.
+Quis hendrerit dolor magna eget est lorem ipsum.
+Ut consequat semper viverra nam libero.
diff --git a/test/apps/testdir/td_first b/test/apps/testdir/td_first
new file mode 100644
index 0000000..21eab9c
--- /dev/null
+++ b/test/apps/testdir/td_first
@@ -0,0 +1,18 @@
+Urna duis convallis convallis tellus id interdum velit.
+Risus in hendrerit gravida rutrum.
+Odio pellentesque diam volutpat commodo sed.
+Duis convallis convallis tellus id interdum velit laoreet id donec.
+Duis at tellus at urna.
+Egestas maecenas pharetra convallis posuere morbi leo urna molestie at.
+Et leo duis ut diam quam nulla porttitor massa id.
+Nunc eget lorem dolor sed viverra ipsum nunc aliquet bibendum.
+Sodales ut etiam sit amet nisl purus in.
+Ac felis donec et odio pellentesque diam volutpat commodo.
+Nunc mi ipsum faucibus vitae aliquet.
+Volutpat ac tincidunt vitae semper quis lectus nulla at volutpat.
+Mollis aliquam ut porttitor leo.
+Vestibulum rhoncus est pellentesque elit ullamcorper dignissim cras.
+Pulvinar proin gravida hendrerit lectus a.
+Ultrices dui sapien eget mi proin.
+Dui vivamus arcu felis bibendum ut.
+Aliquam eleifend mi in nulla posuere sollicitudin aliquam ultrices sagittis.
diff --git a/test/apps/testdir/td_second b/test/apps/testdir/td_second
new file mode 100644
index 0000000..496ff0e
--- /dev/null
+++ b/test/apps/testdir/td_second
@@ -0,0 +1,14 @@
+Egestas purus viverra accumsan in.
+Auctor urna nunc id cursus metus aliquam eleifend.
+Morbi tincidunt augue interdum velit.
+In egestas erat imperdiet sed euismod nisi porta lorem mollis.
+Sed augue lacus viverra vitae congue eu consequat.
+Ut pharetra sit amet aliquam id.
+Aenean euismod elementum nisi quis eleifend.
+Hac habitasse platea dictumst vestibulum rhoncus est pellentesque elit ullamcorper.
+Eget nunc lobortis mattis aliquam faucibus purus.
+Sit amet luctus venenatis lectus magna fringilla.
+Placerat orci nulla pellentesque dignissim enim sit amet venenatis.
+Montes nascetur ridiculus mus mauris.
+Morbi enim nunc faucibus a pellentesque sit amet.
+Et netus et malesuada fames ac turpis egestas.
diff --git a/test/apps/testfile b/test/apps/testfile
new file mode 100644
index 0000000..b56928e
--- /dev/null
+++ b/test/apps/testfile
@@ -0,0 +1,16 @@
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
+Fames ac turpis egestas maecenas pharetra convallis posuere morbi.
+Etiam erat velit scelerisque in dictum non consectetur a erat.
+Dolor purus non enim praesent elementum facilisis.
+Nulla porttitor massa id neque aliquam vestibulum morbi blandit cursus.
+Adipiscing vitae proin sagittis nisl rhoncus mattis rhoncus urna neque.
+Aenean pharetra magna ac placerat.
+Pulvinar proin gravida hendrerit lectus a.
+Iaculis nunc sed augue lacus viverra vitae.
+Ac tortor vitae purus faucibus ornare suspendisse sed.
+Purus in mollis nunc sed id semper.
+Non consectetur a erat nam at lectus urna.
+In ante metus dictum at tempor commodo ullamcorper.
+Auctor augue mauris augue neque gravida in fermentum.
+Nunc scelerisque viverra mauris in.
+Morbi leo urna molestie at elementum.
diff --git a/test/lib/__init__.py b/test/lib/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/lib/__init__.py
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
new file mode 100644
index 0000000..277ac67
--- /dev/null
+++ b/test/lib/test_bsie.py
@@ -0,0 +1,231 @@
+"""
+
+Part of the bsie test suite.
+A copy of the license is provided with the project.
+Author: Matthias Baumgartner, 2022
+"""
+# imports
+import os
+import unittest
+
+# bsie imports
+from bsie.tools import builder
+from bsie.utils import ns
+from bsie.utils.bsfs import URI, schema
+from bsie.utils.node import Node
+
+# objects to test
+from bsie.lib.bsie import BSIE
+
+
+## code ##
+
+class TestBSIE(unittest.TestCase):
+ def setUp(self):
+ # reader builder
+ rbuild = builder.ReaderBuilder({})
+ # extractor builder
+ ebuild = builder.ExtractorBuilder([
+ {'bsie.extractor.generic.path.Path': {}},
+ {'bsie.extractor.generic.stat.Stat': {}},
+ {'bsie.extractor.generic.constant.Constant': dict(
+ tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')],
+ schema='''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ ''',
+ )},
+ ])
+ # build pipeline
+ self.prefix = URI('http://example.com/local/file#')
+ pbuild = builder.PipelineBuilder(self.prefix, rbuild, ebuild)
+ self.pipeline = pbuild.build()
+
+ def test_construction(self):
+ # pipeline only
+ lib = BSIE(self.pipeline)
+ self.assertSetEqual(lib.predicates, {
+ ns.bse.filename,
+ ns.bse.filesize,
+ ns.bse.author,
+ })
+ self.assertEqual(lib.schema, schema.Schema.from_string('''
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+ prefix bsfs: <http://bsfs.ai/schema/>
+ prefix bse: <http://bsfs.ai/schema/Entity#>
+ # essential nodes
+ bsfs:Entity rdfs:subClassOf bsfs:Node .
+ # common definitions
+ xsd:string rdfs:subClassOf bsfs:Literal .
+ xsd:integer rdfs:subClassOf bsfs:Literal .
+
+ bse:filename rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "false"^^xsd:boolean .
+
+ bse:filesize rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer;
+ bsfs:unique "false"^^xsd:boolean .
+
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+
+ '''))
+
+ # specify collect
+ lib = BSIE(self.pipeline, collect={
+ ns.bse.filesize,
+ ns.bse.author,
+ ns.bse.inexistent,
+ })
+ self.assertSetEqual(lib.predicates, {
+ ns.bse.filesize,
+ ns.bse.author,
+ })
+ self.assertEqual(lib.schema, schema.Schema.from_string('''
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+ prefix bsfs: <http://bsfs.ai/schema/>
+ prefix bse: <http://bsfs.ai/schema/Entity#>
+ # essential nodes
+ bsfs:Entity rdfs:subClassOf bsfs:Node .
+ # common definitions
+ xsd:string rdfs:subClassOf bsfs:Literal .
+ xsd:integer rdfs:subClassOf bsfs:Literal .
+
+ bse:filesize rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer;
+ bsfs:unique "false"^^xsd:boolean .
+
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+
+ '''))
+ # empty collect is disregarded
+ lib = BSIE(self.pipeline, collect={})
+ self.assertSetEqual(lib.predicates, {
+ ns.bse.filename,
+ ns.bse.filesize,
+ ns.bse.author,
+ })
+ self.assertEqual(lib.schema, schema.Schema.from_string('''
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+ prefix bsfs: <http://bsfs.ai/schema/>
+ prefix bse: <http://bsfs.ai/schema/Entity#>
+ # essential nodes
+ bsfs:Entity rdfs:subClassOf bsfs:Node .
+ # common definitions
+ xsd:string rdfs:subClassOf bsfs:Literal .
+ xsd:integer rdfs:subClassOf bsfs:Literal .
+
+ bse:filename rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "false"^^xsd:boolean .
+
+ bse:filesize rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer;
+ bsfs:unique "false"^^xsd:boolean .
+
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+
+ '''))
+
+ # specify discard
+ lib = BSIE(self.pipeline, discard={
+ ns.bse.filesize,
+ ns.bse.filename,
+ ns.bse.inexistent,
+ })
+ self.assertSetEqual(lib.predicates, {
+ ns.bse.author,
+ })
+ self.assertEqual(lib.schema, schema.Schema.from_string('''
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+ prefix bsfs: <http://bsfs.ai/schema/>
+ prefix bse: <http://bsfs.ai/schema/Entity#>
+ # essential nodes
+ bsfs:Entity rdfs:subClassOf bsfs:Node .
+ # common definitions
+ xsd:string rdfs:subClassOf bsfs:Literal .
+
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+
+ '''))
+
+ # specify collect and discard
+ lib = BSIE(self.pipeline,
+ collect={ns.bse.filesize, ns.bse.author, ns.bse.foo, ns.bse.bar},
+ discard={ns.bse.author, ns.bse.foo, ns.bse.foobar},
+ )
+ self.assertSetEqual(lib.predicates, {
+ ns.bse.filesize,
+ })
+ self.assertEqual(lib.schema, schema.Schema.from_string('''
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+ prefix bsfs: <http://bsfs.ai/schema/>
+ prefix bse: <http://bsfs.ai/schema/Entity#>
+ # essential nodes
+ bsfs:Entity rdfs:subClassOf bsfs:Node .
+ # common definitions
+ xsd:integer rdfs:subClassOf bsfs:Literal .
+
+ bse:filesize rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer;
+ bsfs:unique "false"^^xsd:boolean .
+
+ '''))
+
+
+ def test_from_file(self):
+ # setup
+ lib = BSIE(self.pipeline)
+ self.assertSetEqual(set(lib.predicates), {
+ ns.bse.filesize,
+ ns.bse.filename,
+ ns.bse.author,
+ })
+ content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
+ subject = Node(ns.bsfs.Entity, self.prefix + content_hash)
+ testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
+
+ # from_file extracts all available triples
+ self.assertSetEqual(set(lib.from_file(testfile)), {
+ (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'),
+ (subject, lib.schema.predicate(ns.bse.filesize), 12),
+ (subject, lib.schema.predicate(ns.bse.author), 'Me, myself, and I'),
+ })
+
+ # from_file respects predicate argument
+ self.assertSetEqual(set(lib.from_file(testfile, {ns.bse.filename, ns.bse.invalid})), {
+ (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'),
+ })
+
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##
diff --git a/test/lib/testfile.t b/test/lib/testfile.t
new file mode 100644
index 0000000..3b18e51
--- /dev/null
+++ b/test/lib/testfile.t
@@ -0,0 +1 @@
+hello world
diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py
index f98b329..0dd8c75 100644
--- a/test/tools/test_pipeline.py
+++ b/test/tools/test_pipeline.py
@@ -95,7 +95,7 @@ class TestPipeline(unittest.TestCase):
# build pipeline
pipeline = Pipeline(self.prefix, self.ext2rdr)
# build objects for tests
- content_hash = 'e3bb4ab54e4a50d75626a1f76814f152f4edc60a82ad724aa2aa922ca5534427'
+ content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
subject = Node(ns.bsfs.Entity, self.prefix + content_hash)
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
p_filename = pipeline.schema.predicate(ns.bse.filename)
@@ -108,7 +108,7 @@ class TestPipeline(unittest.TestCase):
# extract given predicates
self.assertSetEqual(set(pipeline(testfile, {p_filename, p_filesize})), {
(subject, p_filename, 'testfile.t'),
- (subject, p_filesize, 11),
+ (subject, p_filesize, 12),
})
self.assertSetEqual(set(pipeline(testfile, {p_author})), {
(subject, p_author, 'Me, myself, and I'),
@@ -117,12 +117,12 @@ class TestPipeline(unittest.TestCase):
(subject, p_filename, 'testfile.t'),
})
self.assertSetEqual(set(pipeline(testfile, {p_filesize})), {
- (subject, p_filesize, 11),
+ (subject, p_filesize, 12),
})
# extract all predicates
self.assertSetEqual(set(pipeline(testfile)), {
(subject, p_filename, 'testfile.t'),
- (subject, p_filesize, 11),
+ (subject, p_filesize, 12),
(subject, p_author, 'Me, myself, and I'),
(subject, p_rating, 123),
})
@@ -158,6 +158,18 @@ class TestPipeline(unittest.TestCase):
p_filename = pipeline.schema.predicate(ns.bse.filename)
self.assertSetEqual(set(pipeline(testfile, {p_filename})), set())
+ def test_predicates(self):
+ # build pipeline
+ pipeline = Pipeline(self.prefix, self.ext2rdr)
+ #
+ self.assertSetEqual(set(pipeline.predicates()), {
+ pipeline.schema.predicate(ns.bsfs.Predicate),
+ pipeline.schema.predicate(ns.bse.filename),
+ pipeline.schema.predicate(ns.bse.filesize),
+ pipeline.schema.predicate(ns.bse.author),
+ pipeline.schema.predicate(ns.bse.rating),
+ })
+
## main ##
diff --git a/test/tools/testfile.t b/test/tools/testfile.t
index 58bf1b8..3b18e51 100644
--- a/test/tools/testfile.t
+++ b/test/tools/testfile.t
@@ -1 +1 @@
-hello worl
+hello world