diff options
Diffstat (limited to 'bsie/apps/index.py')
-rw-r--r-- | bsie/apps/index.py | 65 |
1 files changed, 24 insertions, 41 deletions
diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 1dbfdd8..d64e8c2 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -1,19 +1,15 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" -# imports +# standard imports import argparse import os import typing # bsie imports -from bsie.base import errors -from bsie.lib import BSIE -from bsie.tools import builder -from bsie.utils import bsfs +from bsie.lib import BSIE, DefaultNamingPolicy +from bsie.utils import bsfs, errors, node as node_ + +# inner-module imports +from . import _loader # exports __all__: typing.Sequence[str] = ( @@ -26,7 +22,12 @@ __all__: typing.Sequence[str] = ( def main(argv): """Index files or directories into BSFS.""" parser = argparse.ArgumentParser(description=main.__doc__, prog='index') - parser.add_argument('--user', type=bsfs.URI, default=bsfs.URI('http://example.com/me'), + parser.add_argument('--config', type=str, + default=os.path.join(os.path.dirname(__file__), _loader.DEFAULT_CONFIG_FILE), + help='Path to the config file.') + parser.add_argument('--host', type=bsfs.URI, default=bsfs.URI('http://example.com'), + help='') + parser.add_argument('--user', type=str, default='me', help='') parser.add_argument('--collect', action='append', default=[], help='') @@ -42,35 +43,15 @@ def main(argv): help='') args = parser.parse_args(argv) - # FIXME: Read reader/extractor configs from a config file - # reader builder - rbuild = builder.ReaderBuilder({}) - # extractor builder - ebuild = builder.ExtractorBuilder([ - {'bsie.extractor.generic.path.Path': {}}, - {'bsie.extractor.generic.stat.Stat': {}}, - {'bsie.extractor.generic.constant.Constant': dict( - tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], - schema=''' - bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "true"^^xsd:boolean . - ''', - )}, - ]) - # pipeline builder - pbuild = builder.PipelineBuilder( - bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')), - rbuild, - ebuild, - ) - # build pipeline - pipeline = pbuild.build() + pipeline = _loader.load_pipeline(args.config) + # build the naming policy + naming_policy = DefaultNamingPolicy( + host=args.host, + user=args.user, + ) # build BSIE frontend - bsie = BSIE(pipeline, args.collect, args.discard) - + bsie = BSIE(pipeline, naming_policy, args.collect, args.discard) def walk(handle): """Walk through given input files.""" @@ -78,11 +59,12 @@ def main(argv): # FIXME: simplify code (below but maybe also above) # FIXME: How to handle dependencies between data? # E.g. do I still want to link to a tag despite not being permitted to set its label? - # FIXME: node renaming? # index input paths for path in args.input_file: - if os.path.isdir(path) and args.recursive: + if not os.path.exists(path): + pass # FIXME: notify the user + elif os.path.isdir(path) and args.recursive: for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow): for filename in filenames: for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)): @@ -105,13 +87,14 @@ def main(argv): store.migrate(bsie.schema) # process files def handle(node, pred, value): + if isinstance(value, node_.Node): + value = store.node(value.node_type, value.uri) store.node(node.node_type, node.uri).set(pred.uri, value) walk(handle) # return store return store - ## main ## if __name__ == '__main__': |