diff options
Diffstat (limited to 'bsie/apps/index.py')
-rw-r--r-- | bsie/apps/index.py | 60 |
1 files changed, 42 insertions, 18 deletions
diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 268a520..05218f8 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -3,6 +3,9 @@ import argparse import typing +# external imports +from tqdm import tqdm + # bsie imports from bsie.lib import BSIE from bsie.matcher import nodes, DefaultMatcher @@ -39,6 +42,8 @@ def main(argv): help='') parser.add_argument('--print', action='store_true', default=False, help='') + parser.add_argument('--output', type=str, default=None, + help='') parser.add_argument('input_file', nargs=argparse.REMAINDER, help='') args = parser.parse_args(argv) @@ -53,33 +58,52 @@ def main(argv): # build BSIE frontend bsie = BSIE(pipeline, matcher, args.collect, args.discard) - def walk(handle): + def walk(handle, status): """Walk through given input files.""" # FIXME: collect all triples by node, set all predicates at once - # FIXME: simplify code (below but maybe also above) # FIXME: How to handle dependencies between data? # E.g. do I still want to link to a tag despite not being permitted to set its label? - for path in list_files(args.input_file, args.recursive, args.follow): + for path in status(list_files(args.input_file, args.recursive, args.follow)): for node, pred, value in bsie.from_file(path): handle(node, pred, value) if args.print: - walk(print) - return None - - # initialize bsfs - # NOTE: With presistent storages, the schema migration will be a seaparte operation. - # Here, we'd simply examine the schema and potentially discard more predicates. - store = bsfs.Open(bsfs.init_sparql_store(args.user)) - store.migrate(bsie.schema) - # process files - def handle(node, pred, value): - if isinstance(value, node_.Node): - value = store.node(value.node_type, value.uri) - store.node(node.node_type, node.uri).set(pred.uri, value) - walk(handle) + def handle(node, pred, value): + if isinstance(value, nodes.Node): + value = value.uri + print(node.uri, pred.uri, value) + status = lambda x: x + ret = None + + elif args.output: + ofile = open(args.output, 'at', encoding='UTF-8') + def handle(node, pred, value): + if isinstance(value, nodes.Node): + value = value.uri + try: + ofile.write(f'{node.uri},{pred.uri},{value}\n') + except Exception as err: + print(err) + status = tqdm + ret = None + + else: + # initialize bsfs + # NOTE: With presistent storages, the schema migration will be a seaparte operation. + # Here, we'd simply examine the schema and potentially discard more predicates. + store = bsfs.Open(bsfs.init_sparql_store(args.user)) + store.migrate(bsie.schema) + # process files + def handle(node, pred, value): + if isinstance(value, nodes.Node): + value = store.node(value.node_type, value.uri) + store.node(node.node_type, node.uri).set(pred.uri, value) + status=tqdm + ret = store + + walk(handle, status=status) # return store - return store + return ret ## main ## |