diff options
author | Matthias Baumgartner <dev@igsor.net> | 2022-12-14 06:10:25 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2022-12-14 06:10:25 +0100 |
commit | 559e643bb1fa39feefd2eb73847ad9420daf1deb (patch) | |
tree | 13243e2e25edd90d16789e6f6e4e473748f347a4 /bsie/apps | |
parent | edc747252a04675c46059215751719b6666a77f9 (diff) | |
download | bsie-559e643bb1fa39feefd2eb73847ad9420daf1deb.tar.gz bsie-559e643bb1fa39feefd2eb73847ad9420daf1deb.tar.bz2 bsie-559e643bb1fa39feefd2eb73847ad9420daf1deb.zip |
bsie extraction and info apps
Diffstat (limited to 'bsie/apps')
-rw-r--r-- | bsie/apps/__init__.py | 20 | ||||
-rw-r--r-- | bsie/apps/index.py | 131 | ||||
-rw-r--r-- | bsie/apps/info.py | 74 |
3 files changed, 225 insertions, 0 deletions
diff --git a/bsie/apps/__init__.py b/bsie/apps/__init__.py new file mode 100644 index 0000000..a548c3c --- /dev/null +++ b/bsie/apps/__init__.py @@ -0,0 +1,20 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from .index import main as index +from .info import main as info + +# exports +__all__: typing.Sequence[str] = ( + 'index', + 'info', + ) + +## EOF ## diff --git a/bsie/apps/index.py b/bsie/apps/index.py new file mode 100644 index 0000000..821aa4c --- /dev/null +++ b/bsie/apps/index.py @@ -0,0 +1,131 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import os +import typing + +# bsfs imports +import bsfs + +# bsie imports +from bsie.base import errors +from bsie.lib.bsie import BSIE +from bsie.tools import builder +from bsie.utils.bsfs import URI + +# exports +__all__: typing.Sequence[str] = ( + 'main', + ) + + +## code ## + +def main(argv): + """Index files or directories into BSFS.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='index') + parser.add_argument('--user', type=URI, default=URI('http://example.com/me'), + help='') + parser.add_argument('--collect', action='append', default=[], + help='') + parser.add_argument('--discard', action='append', default=[], + help='') + parser.add_argument('-r', '--recursive', action='store_true', default=False, + help='') + parser.add_argument('--follow', action='store_true', default=False, + help='') + parser.add_argument('--print', action='store_true', default=False, + help='') + parser.add_argument('input_file', nargs=argparse.REMAINDER, + help='') + args = parser.parse_args(argv) + + # FIXME: Read reader/extractor configs from a config file + # reader builder + rbuild = builder.ReaderBuilder({}) + # extractor builder + ebuild = builder.ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.constant.Constant': dict( + tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], + schema=''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''', + )}, + ]) + # pipeline builder + prefix = URI(args.user + ('file#' if args.user.endswith('/') else '/file#')) + pbuild = builder.PipelineBuilder( + prefix, + rbuild, + ebuild, + ) + + # build pipeline + pipeline = pbuild.build() + # build BSIE frontend + bsie = BSIE(pipeline, args.collect, args.discard) + + + def walk(handle): + """Walk through given input files.""" + # FIXME: collect all triples by node, set all predicates at once + # FIXME: simplify code (below but maybe also above) + # FIXME: How to handle dependencies between data? + # E.g. do I still want to link to a tag despite not being permitted to set its label? + # FIXME: node renaming? + + # index input paths + for path in args.input_file: + if os.path.isdir(path) and args.recursive: + for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow): + for filename in filenames: + for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)): + handle(node, pred, value) + elif os.path.isfile(path): + for node, pred, value in bsie.from_file(path): + handle(node, pred, value) + else: + raise errors.UnreachableError() + + + if args.print: + walk(print) + return None + + else: + # initialize bsfs + # NOTE: With presistent storages, the schema migration will be a seaparte operation. + # Here, we'd simply examine the schema and potentially discard more predicates. + store = bsfs.Open({ + 'Graph': { + 'user': args.user, + 'backend': { + 'SparqlStore': {}}, + }}) + store.migrate(bsie.schema) + # process files + def handle(node, pred, value): + store.node(node.node_type, node.uri).set(pred.uri, value) + walk(handle) + # return store + return store + + + +## main ## + +if __name__ == '__main__': + import sys + main(sys.argv[1:]) + +## EOF ## diff --git a/bsie/apps/info.py b/bsie/apps/info.py new file mode 100644 index 0000000..8cc6dca --- /dev/null +++ b/bsie/apps/info.py @@ -0,0 +1,74 @@ +""" + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import argparse +import sys +import typing + +# bsie imports +from bsie.base import errors +from bsie.tools import builder +from bsie.utils.bsfs import URI + +# exports +__all__: typing.Sequence[str] = ( + 'main', + ) + + +## code ## + +def main(argv): + """Show information from BSIE.""" + parser = argparse.ArgumentParser(description=main.__doc__, prog='info') + parser.add_argument('what', choices=('predicates', ), + help='Select what information to show.') + args = parser.parse_args(argv) + + # FIXME: Read reader/extractor configs from a config file + # reader builder + rbuild = builder.ReaderBuilder({}) + # extractor builder + ebuild = builder.ExtractorBuilder([ + {'bsie.extractor.generic.path.Path': {}}, + {'bsie.extractor.generic.stat.Stat': {}}, + {'bsie.extractor.generic.constant.Constant': dict( + tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], + schema=''' + bse:author rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsfs:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''', + )}, + ]) + # pipeline builder + pbuild = builder.PipelineBuilder( + URI('http://example.com/me/file#'), # not actually used + rbuild, + ebuild, + ) + + # build pipeline + pipeline = pbuild.build() + + # show info + if args.what == 'predicates': + # show predicates + for pred in pipeline.schema.predicates(): + print(pred.uri) + else: + # args.what is already checked by argparse + raise errors.UnreachableError() + + +## main ## + +if __name__ == '__main__': + main(sys.argv[1:]) + +## EOF ## |