aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--bsie/apps/index.py60
1 files changed, 42 insertions, 18 deletions
diff --git a/bsie/apps/index.py b/bsie/apps/index.py
index 268a520..05218f8 100644
--- a/bsie/apps/index.py
+++ b/bsie/apps/index.py
@@ -3,6 +3,9 @@
import argparse
import typing
+# external imports
+from tqdm import tqdm
+
# bsie imports
from bsie.lib import BSIE
from bsie.matcher import nodes, DefaultMatcher
@@ -39,6 +42,8 @@ def main(argv):
help='')
parser.add_argument('--print', action='store_true', default=False,
help='')
+ parser.add_argument('--output', type=str, default=None,
+ help='')
parser.add_argument('input_file', nargs=argparse.REMAINDER,
help='')
args = parser.parse_args(argv)
@@ -53,33 +58,52 @@ def main(argv):
# build BSIE frontend
bsie = BSIE(pipeline, matcher, args.collect, args.discard)
- def walk(handle):
+ def walk(handle, status):
"""Walk through given input files."""
# FIXME: collect all triples by node, set all predicates at once
- # FIXME: simplify code (below but maybe also above)
# FIXME: How to handle dependencies between data?
# E.g. do I still want to link to a tag despite not being permitted to set its label?
- for path in list_files(args.input_file, args.recursive, args.follow):
+ for path in status(list_files(args.input_file, args.recursive, args.follow)):
for node, pred, value in bsie.from_file(path):
handle(node, pred, value)
if args.print:
- walk(print)
- return None
-
- # initialize bsfs
- # NOTE: With presistent storages, the schema migration will be a seaparte operation.
- # Here, we'd simply examine the schema and potentially discard more predicates.
- store = bsfs.Open(bsfs.init_sparql_store(args.user))
- store.migrate(bsie.schema)
- # process files
- def handle(node, pred, value):
- if isinstance(value, node_.Node):
- value = store.node(value.node_type, value.uri)
- store.node(node.node_type, node.uri).set(pred.uri, value)
- walk(handle)
+ def handle(node, pred, value):
+ if isinstance(value, nodes.Node):
+ value = value.uri
+ print(node.uri, pred.uri, value)
+ status = lambda x: x
+ ret = None
+
+ elif args.output:
+ ofile = open(args.output, 'at', encoding='UTF-8')
+ def handle(node, pred, value):
+ if isinstance(value, nodes.Node):
+ value = value.uri
+ try:
+ ofile.write(f'{node.uri},{pred.uri},{value}\n')
+ except Exception as err:
+ print(err)
+ status = tqdm
+ ret = None
+
+ else:
+ # initialize bsfs
+ # NOTE: With presistent storages, the schema migration will be a seaparte operation.
+ # Here, we'd simply examine the schema and potentially discard more predicates.
+ store = bsfs.Open(bsfs.init_sparql_store(args.user))
+ store.migrate(bsie.schema)
+ # process files
+ def handle(node, pred, value):
+ if isinstance(value, nodes.Node):
+ value = store.node(value.node_type, value.uri)
+ store.node(node.node_type, node.uri).set(pred.uri, value)
+ status=tqdm
+ ret = store
+
+ walk(handle, status=status)
# return store
- return store
+ return ret
## main ##