""" Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ # imports import argparse import os import typing # bsie imports from bsie.base import errors from bsie.lib import BSIE from bsie.tools import builder from bsie.utils import bsfs # exports __all__: typing.Sequence[str] = ( 'main', ) ## code ## def main(argv): """Index files or directories into BSFS.""" parser = argparse.ArgumentParser(description=main.__doc__, prog='index') parser.add_argument('--user', type=bsfs.URI, default=bsfs.URI('http://example.com/me'), help='') parser.add_argument('--collect', action='append', default=[], help='') parser.add_argument('--discard', action='append', default=[], help='') parser.add_argument('-r', '--recursive', action='store_true', default=False, help='') parser.add_argument('--follow', action='store_true', default=False, help='') parser.add_argument('--print', action='store_true', default=False, help='') parser.add_argument('input_file', nargs=argparse.REMAINDER, help='') args = parser.parse_args(argv) # FIXME: Read reader/extractor configs from a config file # reader builder rbuild = builder.ReaderBuilder({}) # extractor builder ebuild = builder.ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], schema=''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsfs:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . ''', )}, ]) # pipeline builder pbuild = builder.PipelineBuilder( bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')), rbuild, ebuild, ) # build pipeline pipeline = pbuild.build() # build BSIE frontend bsie = BSIE(pipeline, args.collect, args.discard) def walk(handle): """Walk through given input files.""" # FIXME: collect all triples by node, set all predicates at once # FIXME: simplify code (below but maybe also above) # FIXME: How to handle dependencies between data? # E.g. do I still want to link to a tag despite not being permitted to set its label? # FIXME: node renaming? # index input paths for path in args.input_file: if os.path.isdir(path) and args.recursive: for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow): for filename in filenames: for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)): handle(node, pred, value) elif os.path.isfile(path): for node, pred, value in bsie.from_file(path): handle(node, pred, value) else: raise errors.UnreachableError() if args.print: walk(print) return None else: # initialize bsfs # NOTE: With presistent storages, the schema migration will be a seaparte operation. # Here, we'd simply examine the schema and potentially discard more predicates. store = bsfs.Open({ 'Graph': { 'user': args.user, 'backend': { 'SparqlStore': {}}, }}) store.migrate(bsie.schema) # process files def handle(node, pred, value): store.node(node.node_type, node.uri).set(pred.uri, value) walk(handle) # return store return store ## main ## if __name__ == '__main__': import sys main(sys.argv[1:]) ## EOF ##