diff options
-rw-r--r-- | bsie/extractor/generic/path.py | 19 | ||||
-rw-r--r-- | test/apps/test_info.py | 4 | ||||
-rw-r--r-- | test/apps/test_loader.py | 1 | ||||
-rw-r--r-- | test/apps/test_main.py | 1 | ||||
-rw-r--r-- | test/extractor/generic/test_path.py | 22 | ||||
-rw-r--r-- | test/lib/test_bsie.py | 27 | ||||
-rw-r--r-- | test/lib/test_pipeline.py | 3 |
7 files changed, 69 insertions, 8 deletions
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 00c1121..30d75cf 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -25,6 +25,13 @@ class Path(base.Extractor): def __init__(self): super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' + bse:dirname rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:string ; + rdfs:label "File path"^^xsd:string ; + schema:description "File path in some filesystem."^^xsd:string ; + bsfs:unique "true"^^xsd:boolean . + bse:filename rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:string ; @@ -66,4 +73,16 @@ class Path(base.Extractor): # errors) return None + def __dirname(self, path: str) -> typing.Optional[str]: + try: + return os.path.dirname(os.path.abspath(os.path.normpath(path))) + except Exception: # pylint: disable=broad-except # we explicitly want to catch everything + # some error, skip + # FIXME: some kind of error reporting (e.g. logging)? + # Options: (a) Fail silently (current); (b) Skip and report to log; + # (c) Raise ExtractorError (aborts extraction); (d) separate content type + # checks from basename errors (report content type errors, skip basename + # errors) + return None + ## EOF ## diff --git a/test/apps/test_info.py b/test/apps/test_info.py index ffcaecf..85b4815 100644 --- a/test/apps/test_info.py +++ b/test/apps/test_info.py @@ -19,7 +19,7 @@ from bsie.apps.info import main ## code ## -class TestIndex(unittest.TestCase): +class TestInfo(unittest.TestCase): def setUp(self): config = { 'ReaderBuilder': {}, @@ -63,6 +63,7 @@ class TestIndex(unittest.TestCase): 'https://schema.bsfs.io/ie/Node/Entity#author', 'https://schema.bsfs.io/core/Predicate', 'https://schema.bsfs.io/ie/Node/Entity#filename', + 'https://schema.bsfs.io/ie/Node/Entity#dirname', 'https://schema.bsfs.io/ie/Node/Entity#colors_spatial_0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04', 'https://schema.bsfs.io/ie/Node/Entity#preview', 'https://schema.bsfs.io/ie/Node/Preview#width', @@ -81,6 +82,7 @@ class TestIndex(unittest.TestCase): 'https://schema.bsfs.io/ie/Node/Entity#author', 'https://schema.bsfs.io/core/Predicate', 'https://schema.bsfs.io/ie/Node/Entity#filename', + 'https://schema.bsfs.io/ie/Node/Entity#dirname', 'https://schema.bsfs.io/ie/Node/Entity#colors_spatial_0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04', 'https://schema.bsfs.io/ie/Node/Entity#preview', 'https://schema.bsfs.io/ie/Node/Preview#width', diff --git a/test/apps/test_loader.py b/test/apps/test_loader.py index 4670266..21752c8 100644 --- a/test/apps/test_loader.py +++ b/test/apps/test_loader.py @@ -64,6 +64,7 @@ class TestLoader(unittest.TestCase): 'https://schema.bsfs.io/ie/Node/Entity#author', 'https://schema.bsfs.io/core/Predicate', 'https://schema.bsfs.io/ie/Node/Entity#filename', + 'https://schema.bsfs.io/ie/Node/Entity#dirname', 'https://schema.bsfs.io/ie/Node/Entity#colors_spatial_0658f2234a054e1dd59a14462c89f7733e019160419c796356aa831498bd0a04', 'https://schema.bsfs.io/ie/Node/Entity#preview', 'https://schema.bsfs.io/ie/Node/Preview#width', diff --git a/test/apps/test_main.py b/test/apps/test_main.py index 4fa094b..4404b53 100644 --- a/test/apps/test_main.py +++ b/test/apps/test_main.py @@ -44,6 +44,7 @@ class TestMain(unittest.TestCase): main(['info', '--config', self.config_path, 'predicates']) self.assertEqual(set(outbuf.getvalue().strip().split('\n')), { 'https://schema.bsfs.io/ie/Node/Entity#filename', + 'https://schema.bsfs.io/ie/Node/Entity#dirname', 'https://schema.bsfs.io/ie/Node/Entity#filesize', 'https://schema.bsfs.io/core/Predicate', }) diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py index 0beb37e..569703d 100644 --- a/test/extractor/generic/test_path.py +++ b/test/extractor/generic/test_path.py @@ -1,5 +1,6 @@ # standard imports +import os import unittest # bsie imports @@ -29,6 +30,12 @@ class TestPath(unittest.TestCase): rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . + + bse:dirname rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''')) def test_extract(self): @@ -36,12 +43,13 @@ class TestPath(unittest.TestCase): node = _node.Node(ns.bsn.Entity, '') # Blank node content = '/tmp/foo/bar' p_filename = ext.schema.predicate(ns.bse.filename) + p_dirname = ext.schema.predicate(ns.bse.dirname) entity = ext.schema.node(ns.bsfs.Node).child(ns.bsn.Entity) string = ext.schema.literal(ns.bsfs.Literal).child(ns.xsd.string) # baseline - self.assertSetEqual(set(ext.extract(node, content, (p_filename, ))), - {(node, p_filename, 'bar')}) + self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_dirname))), + {(node, p_filename, 'bar'), (node, p_dirname, '/tmp/foo')}) # predicates parameter is respected p_foo = ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.foo, domain=entity, range=string) # unsupported predicate self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_foo))), @@ -53,10 +61,12 @@ class TestPath(unittest.TestCase): {(node, p_filename, 'bar')}) self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set()) # path variations - self.assertSetEqual(set(ext.extract(node, 'bar', (p_filename, ))), - {(node, p_filename, 'bar')}) - self.assertSetEqual(set(ext.extract(node, '', (p_filename, ))), - {(node, p_filename, '')}) + self.assertSetEqual(set(ext.extract(node, 'bar', (p_filename, p_dirname))), + {(node, p_filename, 'bar'), (node, p_dirname, os.getcwd())}) + self.assertSetEqual(set(ext.extract(node, '/bar', (p_filename, p_dirname))), + {(node, p_filename, 'bar'), (node, p_dirname, '/')}) + self.assertSetEqual(set(ext.extract(node, '', (p_filename, p_dirname))), + {(node, p_filename, ''), (node, p_dirname, os.path.dirname(os.getcwd()))}) # errors are suppressed self.assertSetEqual(set(ext.extract(node, None, (p_filename, ))), set()) diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py index 0c393cc..6586e58 100644 --- a/test/lib/test_bsie.py +++ b/test/lib/test_bsie.py @@ -44,6 +44,7 @@ class TestBSIE(unittest.TestCase): lib = BSIE(self.pipeline, self.naming_policy) self.assertSetEqual(set(lib.principals), { ns.bse.filename, + ns.bse.dirname, ns.bse.filesize, ns.bse.author, }) @@ -53,6 +54,11 @@ class TestBSIE(unittest.TestCase): rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . + bse:dirname rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:integer; @@ -89,6 +95,7 @@ class TestBSIE(unittest.TestCase): lib = BSIE(self.pipeline, self.naming_policy, collect={}) self.assertSetEqual(set(lib.principals), { ns.bse.filename, + ns.bse.dirname, ns.bse.filesize, ns.bse.author, }) @@ -98,6 +105,11 @@ class TestBSIE(unittest.TestCase): rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . + bse:dirname rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + bse:filesize rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:integer; @@ -118,12 +130,24 @@ class TestBSIE(unittest.TestCase): }) self.assertSetEqual(set(lib.principals), { ns.bse.author, + ns.bse.dirname, }) self.assertEqual(lib.schema, bsfs.schema.from_string(SCHEMA_PREAMBLE + ''' bse:author rdfs:subClassOf bsfs:Predicate ; rdfs:domain bsn:Entity ; rdfs:range xsd:string ; bsfs:unique "true"^^xsd:boolean . + + bse:filename rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + + bse:dirname rdfs:subClassOf bsfs:Predicate ; + rdfs:domain bsn:Entity ; + rdfs:range xsd:string ; + bsfs:unique "true"^^xsd:boolean . + ''')) # specify collect and discard @@ -139,7 +163,6 @@ class TestBSIE(unittest.TestCase): rdfs:domain bsn:Entity ; rdfs:range xsd:integer; bsfs:unique "true"^^xsd:boolean . - ''')) @@ -149,6 +172,7 @@ class TestBSIE(unittest.TestCase): self.assertSetEqual(set(lib.principals), { ns.bse.filesize, ns.bse.filename, + ns.bse.dirname, ns.bse.author, }) content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447' @@ -158,6 +182,7 @@ class TestBSIE(unittest.TestCase): # from_file extracts all available triples self.assertSetEqual(set(lib.from_file(testfile)), { (subject, lib.schema.predicate(ns.bse.filename), 'testfile.t'), + (subject, lib.schema.predicate(ns.bse.dirname), os.path.dirname(__file__)), (subject, lib.schema.predicate(ns.bse.filesize), 12), (subject, lib.schema.predicate(ns.bse.author), 'Me, myself, and I'), }) diff --git a/test/lib/test_pipeline.py b/test/lib/test_pipeline.py index eb088a9..8d836fd 100644 --- a/test/lib/test_pipeline.py +++ b/test/lib/test_pipeline.py @@ -87,6 +87,7 @@ class TestPipeline(unittest.TestCase): subject = node.Node(ns.bsn.Entity, ucid=content_hash) testfile = os.path.join(os.path.dirname(__file__), 'testfile.t') p_filename = pipeline.schema.predicate(ns.bse.filename) + p_dirname = pipeline.schema.predicate(ns.bse.dirname) p_filesize = pipeline.schema.predicate(ns.bse.filesize) p_author = pipeline.schema.predicate(ns.bse.author) p_rating = pipeline.schema.predicate(ns.bse.rating) @@ -110,6 +111,7 @@ class TestPipeline(unittest.TestCase): # extract all predicates self.assertSetEqual(set(pipeline(testfile)), { (subject, p_filename, 'testfile.t'), + (subject, p_dirname, os.path.dirname(__file__)), (subject, p_filesize, 12), (subject, p_author, 'Me, myself, and I'), (subject, p_rating, 123), @@ -152,6 +154,7 @@ class TestPipeline(unittest.TestCase): # self.assertSetEqual(set(pipeline.principals), { pipeline.schema.predicate(ns.bse.filename), + pipeline.schema.predicate(ns.bse.dirname), pipeline.schema.predicate(ns.bse.filesize), pipeline.schema.predicate(ns.bse.author), pipeline.schema.predicate(ns.bse.rating), |