From 266c2c9a072bf3289fd7f2d75278b7d59528378c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 24 Dec 2022 10:27:09 +0100 Subject: package restructuring: base * Reader and Extractor to respective reader/extractor modules * ReaderBuilder to reader module * ExtractorBuilder to extractor module * Loading module in utils (safe_load, unpack_name) * Pipeline and PipelineBuilder to lib module * errors to utils * documentation: "standard import" and "external import" --- bsie/apps/index.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'bsie/apps/index.py') diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 1dbfdd8..0c6296f 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -4,16 +4,16 @@ Part of the bsie module. A copy of the license is provided with the project. Author: Matthias Baumgartner, 2022 """ -# imports +# standard imports import argparse import os import typing # bsie imports -from bsie.base import errors -from bsie.lib import BSIE -from bsie.tools import builder -from bsie.utils import bsfs +from bsie.extractor import ExtractorBuilder +from bsie.lib import BSIE, PipelineBuilder +from bsie.reader import ReaderBuilder +from bsie.utils import bsfs, errors # exports __all__: typing.Sequence[str] = ( @@ -44,9 +44,9 @@ def main(argv): # FIXME: Read reader/extractor configs from a config file # reader builder - rbuild = builder.ReaderBuilder({}) + rbuild = ReaderBuilder({}) # extractor builder - ebuild = builder.ExtractorBuilder([ + ebuild = ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( @@ -60,7 +60,7 @@ def main(argv): )}, ]) # pipeline builder - pbuild = builder.PipelineBuilder( + pbuild = PipelineBuilder( bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')), rbuild, ebuild, -- cgit v1.2.3 From bffe6bb52d00e60665b4e8e2144ab91e4465173e Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 20:54:25 +0100 Subject: minor bugfixes --- bsie/apps/index.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'bsie/apps/index.py') diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 0c6296f..7cf94d3 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -82,7 +82,9 @@ def main(argv): # index input paths for path in args.input_file: - if os.path.isdir(path) and args.recursive: + if not os.path.exists(path): + pass # FIXME: notify the user + elif os.path.isdir(path) and args.recursive: for dirpath, _, filenames in os.walk(path, topdown=True, followlinks=args.follow): for filename in filenames: for node, pred, value in bsie.from_file(os.path.join(dirpath, filename)): -- cgit v1.2.3 From 02bbad817077e9a23f7b24b82845fcde24de63a9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 20:55:13 +0100 Subject: image feature integration test --- bsie/apps/index.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'bsie/apps/index.py') diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 7cf94d3..25e006f 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -44,7 +44,10 @@ def main(argv): # FIXME: Read reader/extractor configs from a config file # reader builder - rbuild = ReaderBuilder({}) + rbuild = ReaderBuilder({ + 'bsie.reader.image.Image': { + 'cfg': {}}, # FIXME: cfg should be optional! + }) # extractor builder ebuild = ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, @@ -58,6 +61,11 @@ def main(argv): bsfs:unique "true"^^xsd:boolean . ''', )}, + {'bsie.extractor.image.colors_spatial.ColorsSpatial': { + 'width': 2, + 'height': 2, + 'exp': 2, + }}, ]) # pipeline builder pbuild = PipelineBuilder( -- cgit v1.2.3 From 8439089807bbad92e95ad9062dc74c3d71f5d7eb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 16 Jan 2023 21:35:19 +0100 Subject: ReaderBuilder optional config --- bsie/apps/index.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'bsie/apps/index.py') diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 25e006f..21c2318 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -44,10 +44,7 @@ def main(argv): # FIXME: Read reader/extractor configs from a config file # reader builder - rbuild = ReaderBuilder({ - 'bsie.reader.image.Image': { - 'cfg': {}}, # FIXME: cfg should be optional! - }) + rbuild = ReaderBuilder() # extractor builder ebuild = ExtractorBuilder([ {'bsie.extractor.generic.path.Path': {}}, -- cgit v1.2.3 From 9c26a5ef759b010d8cf4384b0515cc188b885d81 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 17:44:00 +0100 Subject: node naming policy --- bsie/apps/index.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'bsie/apps/index.py') diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 21c2318..a870364 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -11,7 +11,7 @@ import typing # bsie imports from bsie.extractor import ExtractorBuilder -from bsie.lib import BSIE, PipelineBuilder +from bsie.lib import BSIE, PipelineBuilder, DefaultNamingPolicy from bsie.reader import ReaderBuilder from bsie.utils import bsfs, errors @@ -26,7 +26,9 @@ __all__: typing.Sequence[str] = ( def main(argv): """Index files or directories into BSFS.""" parser = argparse.ArgumentParser(description=main.__doc__, prog='index') - parser.add_argument('--user', type=bsfs.URI, default=bsfs.URI('http://example.com/me'), + parser.add_argument('--host', type=bsfs.URI, default=bsfs.URI('http://example.com'), + help='') + parser.add_argument('--user', type=str, default='me', help='') parser.add_argument('--collect', action='append', default=[], help='') @@ -66,16 +68,19 @@ def main(argv): ]) # pipeline builder pbuild = PipelineBuilder( - bsfs.Namespace(args.user + ('/' if not args.user.endswith('/') else '')), rbuild, ebuild, ) # build pipeline pipeline = pbuild.build() + # build the naming policy + naming_policy = DefaultNamingPolicy( + host=args.host, + user=args.user, + ) # build BSIE frontend - bsie = BSIE(pipeline, args.collect, args.discard) - + bsie = BSIE(pipeline, naming_policy, args.collect, args.discard) def walk(handle): """Walk through given input files.""" @@ -83,7 +88,6 @@ def main(argv): # FIXME: simplify code (below but maybe also above) # FIXME: How to handle dependencies between data? # E.g. do I still want to link to a tag despite not being permitted to set its label? - # FIXME: node renaming? # index input paths for path in args.input_file: -- cgit v1.2.3 From a281d6b3a75a7d4a97e673c285ee430a327482ed Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 8 Feb 2023 19:23:46 +0100 Subject: preview extractor --- bsie/apps/index.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'bsie/apps/index.py') diff --git a/bsie/apps/index.py b/bsie/apps/index.py index a870364..8798c49 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -13,7 +13,7 @@ import typing from bsie.extractor import ExtractorBuilder from bsie.lib import BSIE, PipelineBuilder, DefaultNamingPolicy from bsie.reader import ReaderBuilder -from bsie.utils import bsfs, errors +from bsie.utils import bsfs, errors, node as node_ # exports __all__: typing.Sequence[str] = ( @@ -49,6 +49,9 @@ def main(argv): rbuild = ReaderBuilder() # extractor builder ebuild = ExtractorBuilder([ + {'bsie.extractor.preview.Preview': { + 'max_sides': [50], + }}, {'bsie.extractor.generic.path.Path': {}}, {'bsie.extractor.generic.stat.Stat': {}}, {'bsie.extractor.generic.constant.Constant': dict( @@ -116,6 +119,8 @@ def main(argv): store.migrate(bsie.schema) # process files def handle(node, pred, value): + if isinstance(value, node_.Node): + value = store.node(value.node_type, value.uri) store.node(node.node_type, node.uri).set(pred.uri, value) walk(handle) # return store -- cgit v1.2.3 From 464cc6cb54f55f6255bf0a485533c181d6018303 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 17:07:06 +0100 Subject: load config from file --- bsie/apps/index.py | 44 ++++++++------------------------------------ 1 file changed, 8 insertions(+), 36 deletions(-) (limited to 'bsie/apps/index.py') diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 8798c49..2d147c9 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -10,11 +10,12 @@ import os import typing # bsie imports -from bsie.extractor import ExtractorBuilder -from bsie.lib import BSIE, PipelineBuilder, DefaultNamingPolicy -from bsie.reader import ReaderBuilder +from bsie.lib import BSIE, DefaultNamingPolicy from bsie.utils import bsfs, errors, node as node_ +# inner-module imports +from . import _loader + # exports __all__: typing.Sequence[str] = ( 'main', @@ -26,6 +27,9 @@ __all__: typing.Sequence[str] = ( def main(argv): """Index files or directories into BSFS.""" parser = argparse.ArgumentParser(description=main.__doc__, prog='index') + parser.add_argument('--config', type=str, + default=os.path.join(os.path.dirname(__file__), _loader.DEFAULT_CONFIG_FILE), + help='Path to the config file.') parser.add_argument('--host', type=bsfs.URI, default=bsfs.URI('http://example.com'), help='') parser.add_argument('--user', type=str, default='me', @@ -44,39 +48,8 @@ def main(argv): help='') args = parser.parse_args(argv) - # FIXME: Read reader/extractor configs from a config file - # reader builder - rbuild = ReaderBuilder() - # extractor builder - ebuild = ExtractorBuilder([ - {'bsie.extractor.preview.Preview': { - 'max_sides': [50], - }}, - {'bsie.extractor.generic.path.Path': {}}, - {'bsie.extractor.generic.stat.Stat': {}}, - {'bsie.extractor.generic.constant.Constant': dict( - tuples=[('http://bsfs.ai/schema/Entity#author', 'Me, myself, and I')], - schema=''' - bse:author rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; - rdfs:range xsd:string ; - bsfs:unique "true"^^xsd:boolean . - ''', - )}, - {'bsie.extractor.image.colors_spatial.ColorsSpatial': { - 'width': 2, - 'height': 2, - 'exp': 2, - }}, - ]) - # pipeline builder - pbuild = PipelineBuilder( - rbuild, - ebuild, - ) - # build pipeline - pipeline = pbuild.build() + pipeline = _loader.load_pipeline(args.config) # build the naming policy naming_policy = DefaultNamingPolicy( host=args.host, @@ -127,7 +100,6 @@ def main(argv): return store - ## main ## if __name__ == '__main__': -- cgit v1.2.3 From 4b5c4d486bb4f0f4da2e25ad464e8336a781cdcb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 1 Mar 2023 22:31:03 +0100 Subject: removed module header stubs --- bsie/apps/index.py | 5 ----- 1 file changed, 5 deletions(-) (limited to 'bsie/apps/index.py') diff --git a/bsie/apps/index.py b/bsie/apps/index.py index 2d147c9..d64e8c2 100644 --- a/bsie/apps/index.py +++ b/bsie/apps/index.py @@ -1,9 +1,4 @@ -""" -Part of the bsie module. -A copy of the license is provided with the project. -Author: Matthias Baumgartner, 2022 -""" # standard imports import argparse import os -- cgit v1.2.3