aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--bsie/base/extractor.py5
-rw-r--r--bsie/extractor/generic/constant.py9
-rw-r--r--bsie/extractor/generic/path.py6
-rw-r--r--bsie/extractor/generic/stat.py6
-rw-r--r--bsie/lib/bsie.py11
-rw-r--r--bsie/tools/builder.py17
-rw-r--r--bsie/tools/pipeline.py6
-rw-r--r--test/lib/test_bsie.py6
-rw-r--r--test/tools/test_builder.py19
-rw-r--r--test/tools/test_pipeline.py9
-rw-r--r--test/utils/test_node.py17
11 files changed, 56 insertions, 55 deletions
diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
index bfa403c..a5c7846 100644
--- a/bsie/base/extractor.py
+++ b/bsie/base/extractor.py
@@ -11,6 +11,7 @@ import typing
# bsie imports
from bsie.utils import node
from bsie.utils.bsfs import schema as _schema, typename
+from bsie.utils import bsfs, node, ns
# exports
__all__: typing.Sequence[str] = (
@@ -62,10 +63,10 @@ class Extractor(abc.ABC):
self.schema = schema
def __str__(self) -> str:
- return typename(self)
+ return bsfs.typename(self)
def __repr__(self) -> str:
- return f'{typename(self)}()'
+ return f'{bsfs.typename(self)}()'
def __eq__(self, other: typing.Any) -> bool:
return isinstance(other, type(self)) \
diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py
index 7da792a..f9e3415 100644
--- a/bsie/extractor/generic/constant.py
+++ b/bsie/extractor/generic/constant.py
@@ -11,6 +11,7 @@ import typing
from bsie.base import extractor
from bsie.utils.bsfs import URI, schema as _schema
from bsie.utils.node import Node
+from bsie.utils import bsfs, node
# exports
__all__: typing.Sequence[str] = (
@@ -26,14 +27,14 @@ class Constant(extractor.Extractor):
CONTENT_READER = None
# predicate/value pairs to be produced.
- _tuples: typing.Tuple[typing.Tuple[_schema.Predicate, typing.Any], ...]
+ _tuples: typing.Tuple[typing.Tuple[bsfs.schema.Predicate, typing.Any], ...]
def __init__(
self,
schema: str,
- tuples: typing.Iterable[typing.Tuple[URI, typing.Any]],
+ tuples: typing.Iterable[typing.Tuple[bsfs.URI, typing.Any]],
):
- super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema))
+ super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema))
# NOTE: Raises a KeyError if the predicate is not part of the schema
self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples)
# FIXME: use schema instance for value checking
@@ -47,7 +48,7 @@ class Constant(extractor.Extractor):
def extract(
self,
- subject: Node,
+ subject: node.Node,
content: None,
predicates: typing.Iterable[_schema.Predicate],
) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]:
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index e6b901e..2cc592a 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -10,8 +10,8 @@ import typing
# bsie imports
from bsie.base import extractor
-from bsie.utils import node, ns
from bsie.utils.bsfs import schema
+from bsie.utils import bsfs, node, ns
# exports
__all__: typing.Sequence[str] = (
@@ -27,10 +27,10 @@ class Path(extractor.Extractor):
CONTENT_READER = 'bsie.reader.path.Path'
# mapping from predicate to handler function.
- _callmap: typing.Dict[schema.Predicate, typing.Callable[[str], typing.Any]]
+ _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[str], typing.Any]]
def __init__(self):
- super().__init__(schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:string ;
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
index 6493d37..dfde7d2 100644
--- a/bsie/extractor/generic/stat.py
+++ b/bsie/extractor/generic/stat.py
@@ -10,8 +10,8 @@ import typing
# bsie imports
from bsie.base import extractor
-from bsie.utils import node, ns
from bsie.utils.bsfs import schema as _schema
+from bsie.utils import bsfs, node, ns
# exports
__all__: typing.Sequence[str] = (
@@ -27,10 +27,10 @@ class Stat(extractor.Extractor):
CONTENT_READER = 'bsie.reader.stat.Stat'
# mapping from predicate to handler function.
- _callmap: typing.Dict[_schema.Predicate, typing.Callable[[os.stat_result], typing.Any]]
+ _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[os.stat_result], typing.Any]]
def __init__(self):
- super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:integer ;
diff --git a/bsie/lib/bsie.py b/bsie/lib/bsie.py
index aeccc8c..3aeee2b 100644
--- a/bsie/lib/bsie.py
+++ b/bsie/lib/bsie.py
@@ -9,8 +9,9 @@ import typing
# bsie imports
from bsie.tools.pipeline import Pipeline
-from bsie.utils import node, ns
from bsie.utils.bsfs import URI, schema as schema_
+from bsie.tools import Pipeline
+from bsie.utils import bsfs, node, ns
# exports
__all__: typing.Sequence[str] = (
@@ -39,10 +40,10 @@ class BSIE():
self,
# pipeline builder.
pipeline: Pipeline,
- # predicates to extract at most. None implies all available w.r.t. extractors.
- collect: typing.Optional[typing.Iterable[URI]] = None,
- # predicates to discard.
- discard: typing.Optional[typing.Iterable[URI]] = None,
+ # principals to extract at most. None implies all available w.r.t. extractors.
+ collect: typing.Optional[typing.Iterable[bsfs.URI]] = None,
+ # principals to discard.
+ discard: typing.Optional[typing.Iterable[bsfs.URI]] = None,
):
# store pipeline
self.pipeline = pipeline
diff --git a/bsie/tools/builder.py b/bsie/tools/builder.py
index 8f7a410..8c6b931 100644
--- a/bsie/tools/builder.py
+++ b/bsie/tools/builder.py
@@ -13,6 +13,7 @@ import typing
from bsie import base
from bsie.base import errors
from bsie.utils.bsfs import URI, typename
+from bsie.utils import bsfs
# inner-module imports
from . import pipeline
@@ -61,7 +62,7 @@ def _unpack_name(name):
class ReaderBuilder():
- """Build `bsie.base.reader.Reader` instances.
+ """Build `bsie.base.Reader` instances.
Readers are defined via their qualified class name
(e.g., bsie.reader.path.Path) and optional keyword
@@ -83,7 +84,7 @@ class ReaderBuilder():
self.kwargs = kwargs
self.cache = {}
- def build(self, name: str) -> base.reader.Reader:
+ def build(self, name: str) -> base.Reader:
"""Return an instance for the qualified class name."""
# return cached instance
if name in self.cache:
@@ -98,7 +99,7 @@ class ReaderBuilder():
# get kwargs
kwargs = self.kwargs.get(name, {})
if not isinstance(kwargs, dict):
- raise TypeError(f'expected a kwargs dict, found {typename(kwargs)}')
+ raise TypeError(f'expected a kwargs dict, found {bsfs.typename(kwargs)}')
try: # build, cache, and return instance
obj = cls(**kwargs)
@@ -108,11 +109,11 @@ class ReaderBuilder():
return obj
except Exception as err:
- raise errors.BuilderError(f'failed to build reader {name} due to {typename(err)}: {err}') from err
+ raise errors.BuilderError(f'failed to build reader {name} due to {bsfs.typename(err)}: {err}') from err
class ExtractorBuilder():
- """Build `bsie.base.extractor.Extractor instances.
+ """Build `bsie.base.Extractor instances.
It is permissible to build multiple instances of the same extractor
(typically with different arguments), hence the ExtractorBuilder
@@ -133,14 +134,14 @@ class ExtractorBuilder():
"""Iterate over extractor specifications."""
return iter(range(len(self.specs)))
- def build(self, index: int) -> base.extractor.Extractor:
+ def build(self, index: int) -> base.Extractor:
"""Return an instance of the n'th extractor (n=*index*)."""
# get build instructions
specs = self.specs[index]
# check specs structure. expecting[{name: {kwargs}}]
if not isinstance(specs, dict):
- raise TypeError(f'expected a dict, found {typename(specs)}')
+ raise TypeError(f'expected a dict, found {bsfs.typename(specs)}')
if len(specs) != 1:
raise TypeError(f'expected a dict of length one, found {len(specs)}')
@@ -150,7 +151,7 @@ class ExtractorBuilder():
# check kwargs structure
if not isinstance(kwargs, dict):
- raise TypeError(f'expected a dict, found {typename(kwargs)}')
+ raise TypeError(f'expected a dict, found {bsfs.typename(kwargs)}')
# check name and get module/class components
module_name, class_name = _unpack_name(name)
diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py
index da422c0..7fdd935 100644
--- a/bsie/tools/pipeline.py
+++ b/bsie/tools/pipeline.py
@@ -11,9 +11,9 @@ import typing
# bsie imports
from bsie import base
-from bsie.utils import ns
from bsie.utils.node import Node
from bsie.utils.bsfs import schema as _schema, URI, uuid as _uuid, typename
+from bsie.utils import bsfs, node, ns
# exports
__all__: typing.Sequence[str] = (
@@ -56,10 +56,10 @@ class Pipeline():
self.schema = _schema.Schema.Union(ext.schema for ext in ext2rdr)
def __str__(self) -> str:
- return typename(self)
+ return bsfs.typename(self)
def __repr__(self) -> str:
- return f'{typename(self)}(...)'
+ return f'{bsfs.typename(self)}(...)'
def __hash__(self) -> int:
return hash((type(self), self._prefix, self.schema, tuple(self._ext2rdr), tuple(self._ext2rdr.values())))
diff --git a/test/lib/test_bsie.py b/test/lib/test_bsie.py
index 277ac67..5b71752 100644
--- a/test/lib/test_bsie.py
+++ b/test/lib/test_bsie.py
@@ -9,10 +9,11 @@ import os
import unittest
# bsie imports
+from bsie.base import extractor
from bsie.tools import builder
-from bsie.utils import ns
from bsie.utils.bsfs import URI, schema
from bsie.utils.node import Node
+from bsie.utils import bsfs, node, ns
# objects to test
from bsie.lib.bsie import BSIE
@@ -76,7 +77,6 @@ class TestBSIE(unittest.TestCase):
rdfs:domain bsfs:Entity ;
rdfs:range xsd:string ;
bsfs:unique "true"^^xsd:boolean .
-
'''))
# specify collect
@@ -207,7 +207,7 @@ class TestBSIE(unittest.TestCase):
ns.bse.author,
})
content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
- subject = Node(ns.bsfs.Entity, self.prefix + content_hash)
+ subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash)
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
# from_file extracts all available triples
diff --git a/test/tools/test_builder.py b/test/tools/test_builder.py
index bc6f903..62c637c 100644
--- a/test/tools/test_builder.py
+++ b/test/tools/test_builder.py
@@ -10,8 +10,7 @@ import unittest
# bsie imports
from bsie import base
-from bsie.base import errors
-from bsie.utils.bsfs import URI
+from bsie.utils import bsfs
# objects to test
from bsie.tools.builder import ExtractorBuilder
@@ -26,12 +25,12 @@ from bsie.tools.builder import _unpack_name
class TestUtils(unittest.TestCase):
def test_safe_load(self):
# invalid module
- self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar')
- self.assertRaises(errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar')
+ self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN', 'foobar')
+ self.assertRaises(base.errors.LoaderError, _safe_load, 'dBGHMSAYOoKeKMpywDoKZQycENFPvN.bar', 'foobar')
# partially valid module
- self.assertRaises(errors.LoaderError, _safe_load, 'os.foo', 'foobar')
+ self.assertRaises(base.errors.LoaderError, _safe_load, 'os.foo', 'foobar')
# invalid class
- self.assertRaises(errors.LoaderError, _safe_load, 'os.path', 'foo')
+ self.assertRaises(base.errors.LoaderError, _safe_load, 'os.path', 'foo')
# valid module and class
cls = _safe_load('collections.abc', 'Container')
import collections.abc
@@ -65,10 +64,10 @@ class TestReaderBuilder(unittest.TestCase):
self.assertRaises(TypeError, builder.build, None)
self.assertRaises(ValueError, builder.build, '')
self.assertRaises(ValueError, builder.build, 'Path')
- self.assertRaises(errors.BuilderError, builder.build, 'path.Path')
+ self.assertRaises(base.errors.BuilderError, builder.build, 'path.Path')
# invalid config
builder = ReaderBuilder({'bsie.reader.stat.Stat': dict(foo=123)})
- self.assertRaises(errors.BuilderError, builder.build, 'bsie.reader.stat.Stat')
+ self.assertRaises(base.errors.BuilderError, builder.build, 'bsie.reader.stat.Stat')
builder = ReaderBuilder({'bsie.reader.stat.Stat': 123})
self.assertRaises(TypeError, builder.build, 'bsie.reader.stat.Stat')
# no instructions
@@ -143,7 +142,7 @@ class TestExtractorBuilder(unittest.TestCase):
]))
# building with invalid args
- self.assertRaises(errors.BuilderError, ExtractorBuilder(
+ self.assertRaises(base.errors.BuilderError, ExtractorBuilder(
[{'bsie.extractor.generic.path.Path': {'foo': 123}}]).build, 0)
# non-dict build specification
self.assertRaises(TypeError, ExtractorBuilder(
@@ -161,7 +160,7 @@ class TestExtractorBuilder(unittest.TestCase):
class TestPipelineBuilder(unittest.TestCase):
def test_build(self):
- prefix = URI('http://example.com/local/file#')
+ prefix = bsfs.URI('http://example.com/local/file#')
c_schema = '''
bse:author rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
diff --git a/test/tools/test_pipeline.py b/test/tools/test_pipeline.py
index 0dd8c75..92801ed 100644
--- a/test/tools/test_pipeline.py
+++ b/test/tools/test_pipeline.py
@@ -11,9 +11,8 @@ import unittest
# bsie imports
from bsie.base import errors
-from bsie.utils import ns
from bsie.utils.bsfs import URI
-from bsie.utils.node import Node
+from bsie.utils import bsfs, node, ns
import bsie.extractor.generic.constant
import bsie.extractor.generic.path
import bsie.extractor.generic.stat
@@ -68,8 +67,8 @@ class TestPipeline(unittest.TestCase):
self.assertEqual(hash(pipeline), hash(Pipeline(self.prefix, self.ext2rdr)))
# equivalence respects prefix
- self.assertNotEqual(pipeline, Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr))
- self.assertNotEqual(hash(pipeline), hash(Pipeline(URI('http://example.com/global/ent#'), self.ext2rdr)))
+ self.assertNotEqual(pipeline, Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr))
+ self.assertNotEqual(hash(pipeline), hash(Pipeline(bsfs.URI('http://example.com/global/ent#'), self.ext2rdr)))
# equivalence respects extractors/readers
ext2rdr = {ext: rdr for idx, (ext, rdr) in enumerate(self.ext2rdr.items()) if idx % 2 == 0}
self.assertNotEqual(pipeline, Pipeline(self.prefix, ext2rdr))
@@ -96,7 +95,7 @@ class TestPipeline(unittest.TestCase):
pipeline = Pipeline(self.prefix, self.ext2rdr)
# build objects for tests
content_hash = 'a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447'
- subject = Node(ns.bsfs.Entity, self.prefix + content_hash)
+ subject = node.Node(ns.bsfs.File, self.prefix + 'file#' + content_hash)
testfile = os.path.join(os.path.dirname(__file__), 'testfile.t')
p_filename = pipeline.schema.predicate(ns.bse.filename)
p_filesize = pipeline.schema.predicate(ns.bse.filesize)
diff --git a/test/utils/test_node.py b/test/utils/test_node.py
index 826f199..c70f0b8 100644
--- a/test/utils/test_node.py
+++ b/test/utils/test_node.py
@@ -8,8 +8,7 @@ Author: Matthias Baumgartner, 2022
import unittest
# bsie imports
-from bsie.utils.bsfs import URI
-from bsie.utils import ns
+from bsie.utils import bsfs, ns
# objects to test
from bsie.utils.node import Node
@@ -19,14 +18,14 @@ from bsie.utils.node import Node
class TestNode(unittest.TestCase):
def test_equality(self):
- uri = URI('http://example.com/me/entity#1234')
+ uri = bsfs.URI('http://example.com/me/entity#1234')
node = Node(ns.bsfs.Entity, uri)
# basic equivalence
- self.assertEqual(node, Node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234')))
- self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, URI('http://example.com/me/entity#1234'))))
+ self.assertEqual(node, Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#1234')))
+ self.assertEqual(hash(node), hash(Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#1234'))))
# equality respects uri
- self.assertNotEqual(node, Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321')))
- self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321'))))
+ self.assertNotEqual(node, Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321')))
+ self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321'))))
# equality respects node_type
self.assertNotEqual(node, Node(ns.bsfs.Foo, uri))
self.assertNotEqual(hash(node), hash(Node(ns.bsfs.Foo, uri)))
@@ -42,7 +41,7 @@ class TestNode(unittest.TestCase):
self.assertNotEqual(hash(node), hash(Foo()))
def test_str(self):
- uri = URI('http://example.com/me/entity#1234')
+ uri = bsfs.URI('http://example.com/me/entity#1234')
# basic string conversion
node = Node(ns.bsfs.Entity, uri)
self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#1234)')
@@ -52,7 +51,7 @@ class TestNode(unittest.TestCase):
self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Foo, http://example.com/me/entity#1234)')
self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Foo, http://example.com/me/entity#1234)')
# string conversion respects uri
- node = Node(ns.bsfs.Entity, URI('http://example.com/me/entity#4321'))
+ node = Node(ns.bsfs.Entity, bsfs.URI('http://example.com/me/entity#4321'))
self.assertEqual(str(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#4321)')
self.assertEqual(repr(node), 'Node(http://bsfs.ai/schema/Entity, http://example.com/me/entity#4321)')