# standard imports import argparse import os import typing # bsie imports from bsie.lib import BSIE, DefaultNamingPolicy from bsie.utils import bsfs, errors, node as node_, list_files # inner-module imports from . import _loader # exports __all__: typing.Sequence[str] = ( 'main', ) ## code ## def main(argv): """Index files or directories into BSFS.""" parser = argparse.ArgumentParser(description=main.__doc__, prog='index') parser.add_argument('--config', type=str, default=_loader.DEFAULT_CONFIG_FILE, help='Path to the config file.') parser.add_argument('--host', type=bsfs.URI, default=bsfs.URI('http://example.com'), help='') parser.add_argument('--user', type=str, default='me', help='') parser.add_argument('--collect', action='append', default=[], help='') parser.add_argument('--discard', action='append', default=[], help='') parser.add_argument('-r', '--recursive', action='store_true', default=False, help='') parser.add_argument('--follow', action='store_true', default=False, help='') parser.add_argument('--print', action='store_true', default=False, help='') parser.add_argument('input_file', nargs=argparse.REMAINDER, help='') args = parser.parse_args(argv) # build pipeline pipeline = _loader.load_pipeline(args.config) # build the naming policy naming_policy = DefaultNamingPolicy( host=args.host, user=args.user, ) # build BSIE frontend bsie = BSIE(pipeline, naming_policy, args.collect, args.discard) def walk(handle): """Walk through given input files.""" # FIXME: collect all triples by node, set all predicates at once # FIXME: simplify code (below but maybe also above) # FIXME: How to handle dependencies between data? # E.g. do I still want to link to a tag despite not being permitted to set its label? for path in list_files(args.input_file, args.recursive, args.follow): for node, pred, value in bsie.from_file(path): handle(node, pred, value) if args.print: walk(print) return None # initialize bsfs # NOTE: With presistent storages, the schema migration will be a seaparte operation. # Here, we'd simply examine the schema and potentially discard more predicates. store = bsfs.Open(bsfs.init_sparql_store(args.user)) store.migrate(bsie.schema) # process files def handle(node, pred, value): if isinstance(value, node_.Node): value = store.node(value.node_type, value.uri) store.node(node.node_type, node.uri).set(pred.uri, value) walk(handle) # return store return store ## main ## if __name__ == '__main__': import sys main(sys.argv[1:]) ## EOF ##