aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2022-11-25 14:31:29 +0100
committerMatthias Baumgartner <dev@igsor.net>2022-11-25 14:31:29 +0100
commite174a25585e64eb1b0759440cad48d642dd31829 (patch)
treefadee735ef922156ba4a67506154c26fab2ecdd5
parent9389c741bdbbca9adbff6099d440706cd63deac4 (diff)
downloadbsie-e174a25585e64eb1b0759440cad48d642dd31829.tar.gz
bsie-e174a25585e64eb1b0759440cad48d642dd31829.tar.bz2
bsie-e174a25585e64eb1b0759440cad48d642dd31829.zip
use schema and predicate types in extractors
-rw-r--r--bsie/base/errors.py13
-rw-r--r--bsie/base/extractor.py51
-rw-r--r--bsie/extractor/generic/constant.py20
-rw-r--r--bsie/extractor/generic/path.py40
-rw-r--r--bsie/extractor/generic/stat.py34
-rw-r--r--bsie/utils/bsfs.py2
-rw-r--r--bsie/utils/namespaces.py3
-rw-r--r--bsie/utils/node.py2
-rw-r--r--test/extractor/generic/test_constant.py63
-rw-r--r--test/extractor/generic/test_path.py53
-rw-r--r--test/extractor/generic/test_stat.py48
11 files changed, 235 insertions, 94 deletions
diff --git a/bsie/base/errors.py b/bsie/base/errors.py
index f86ffb2..eedce3b 100644
--- a/bsie/base/errors.py
+++ b/bsie/base/errors.py
@@ -8,15 +8,22 @@ Author: Matthias Baumgartner, 2022
import typing
# exports
-__all__: typing.Sequence[str] = []
+__all__: typing.Sequence[str] = (
+ 'ExtractorError',
+ )
+
+
## code ##
-class _BSIE_Error(Exception):
+class _BSIEError(Exception):
"""Generic BSIE error."""
-class ReaderError(_BSIE_Error):
+class ExtractorError(_BSIEError):
+ """The Extractor failed to process the given content."""
+
+class ReaderError(_BSIEError):
"""The Reader failed to read the given file."""
## EOF ##
diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
index ea43925..a6a69c6 100644
--- a/bsie/base/extractor.py
+++ b/bsie/base/extractor.py
@@ -11,13 +11,38 @@ import typing
# inner-module imports
from . import reader
from bsie.utils import node
-from bsie.utils.bsfs import URI, typename
+from bsie.utils.bsfs import schema as _schema, typename
# exports
__all__: typing.Sequence[str] = (
'Extractor',
)
+# constants
+
+# essential definitions typically used in extractor schemas.
+# NOTE: The definition here is only for convenience; Each Extractor must implement its use, if so desired.
+SCHEMA_PREAMBLE = '''
+ # common external prefixes
+ prefix owl: <http://www.w3.org/2002/07/owl#>
+ prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+ prefix schema: <http://schema.org/>
+
+ # common bsfs prefixes
+ prefix bsfs: <http://bsfs.ai/schema/>
+ prefix bse: <http://bsfs.ai/schema/Entity#>
+
+ # essential nodes
+ bsfs:Entity rdfs:subClassOf bsfs:Node .
+
+ # common definitions
+ xsd:string rdfs:subClassOf bsfs:Literal .
+ xsd:integer rdfs:subClassOf bsfs:Literal .
+
+ '''
+
## code ##
@@ -27,23 +52,37 @@ class Extractor(abc.ABC):
# what type of content is expected (i.e. reader subclass).
CONTENT_READER: typing.Optional[typing.Type[reader.Reader]] = None
+ # extractor schema.
+ schema: _schema.Schema
+
+ def __init__(self, schema: _schema.Schema):
+ self.schema = schema
+
def __str__(self) -> str:
return typename(self)
def __repr__(self) -> str:
return f'{typename(self)}()'
- @abc.abstractmethod
- def schema(self) -> str:
- """Return the schema (predicates and nodes) produced by this Extractor."""
+
+ def predicates(self) -> typing.Iterator[_schema.Predicate]:
+ """Return the predicates that may be part of extracted triples."""
+ # NOTE: Some predicates in the schema might not occur in actual triples,
+ # but are defined due to predicate class hierarchy. E.g., bsfs:Predicate
+ # is part of every schema but should not be used in triples.
+ # Announcing all predicates might not be the most efficient way, however,
+ # it is the most safe one. Concrete extractors that produce additional
+ # predicates (e.g. auxiliary nodes with their own predicates) should
+ # overwrite this method to only include the principal predicates.
+ return self.schema.predicates()
@abc.abstractmethod
def extract(
self,
subject: node.Node,
content: typing.Any,
- predicates: typing.Iterable[URI],
- ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
+ predicates: typing.Iterable[_schema.Predicate],
+ ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]:
"""Return (node, predicate, value) triples."""
## EOF ##
diff --git a/bsie/extractor/generic/constant.py b/bsie/extractor/generic/constant.py
index e243131..795bac6 100644
--- a/bsie/extractor/generic/constant.py
+++ b/bsie/extractor/generic/constant.py
@@ -7,9 +7,9 @@ Author: Matthias Baumgartner, 2022
# imports
import typing
-# inner-module imports
+# bsie imports
from bsie.base import extractor
-from bsie.utils.bsfs import URI
+from bsie.utils.bsfs import URI, schema as _schema
from bsie.utils.node import Node
# exports
@@ -25,26 +25,26 @@ class Constant(extractor.Extractor):
CONTENT_READER = None
+ # predicate/value pairs to be produced.
+ _tuples: typing.Tuple[typing.Tuple[_schema.Predicate, typing.Any], ...]
+
def __init__(
self,
schema: str,
tuples: typing.Iterable[typing.Tuple[URI, typing.Any]],
):
- self._schema = schema
- self._tuples = tuples
- # FIXME: use schema instance for predicate checking
- #self._tuples = [(pred, value) for pred, value in tuples if pred in schema]
+ super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + schema))
+ # NOTE: Raises a KeyError if the predicate is not part of the schema
+ self._tuples = tuple((self.schema.predicate(p_uri), value) for p_uri, value in tuples)
# FIXME: use schema instance for value checking
- def schema(self) -> str:
- return self._schema
def extract(
self,
subject: Node,
content: None,
- predicates: typing.Iterable[URI],
- ) -> typing.Iterator[typing.Tuple[Node, URI, typing.Any]]:
+ predicates: typing.Iterable[_schema.Predicate],
+ ) -> typing.Iterator[typing.Tuple[Node, _schema.Predicate, typing.Any]]:
for pred, value in self._tuples:
if pred in predicates:
yield subject, pred, value
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index c39bbd2..f358a79 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -8,11 +8,10 @@ Author: Matthias Baumgartner, 2022
import os
import typing
-# inner-module imports
+# bsie imports
from bsie.base import extractor
from bsie.utils import node, ns
-from bsie.utils.bsfs import URI
-import bsie.reader.path
+from bsie.utils.bsfs import schema
# exports
__all__: typing.Sequence[str] = (
@@ -27,30 +26,31 @@ class Path(extractor.Extractor):
CONTENT_READER = bsie.reader.path.Path
- def __init__(self):
- self.__callmap = {
- ns.bse.filename: self.__filename,
- }
+ # mapping from predicate to handler function.
+ _callmap: typing.Dict[schema.Predicate, typing.Callable[[str], typing.Any]]
- def schema(self) -> str:
- return '''
- bse:filename a bsfs:Predicate ;
+ def __init__(self):
+ super().__init__(schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ bse:filename rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:string ;
- rdf:label "File name"^^xsd:string ;
+ rdfs:label "File name"^^xsd:string ;
schema:description "Filename of entity in some filesystem."^^xsd:string ;
owl:maxCardinality "INF"^^xsd:number .
- '''
+ '''))
+ self._callmap = {
+ self.schema.predicate(ns.bse.filename): self.__filename,
+ }
def extract(
self,
subject: node.Node,
content: CONTENT_READER.CONTENT_TYPE,
- predicates: typing.Iterable[URI],
- ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
+ predicates: typing.Iterable[schema.Predicate],
+ ) -> typing.Iterator[typing.Tuple[node.Node, schema.Predicate, typing.Any]]:
for pred in predicates:
# find callback
- clbk = self.__callmap.get(pred)
+ clbk = self._callmap.get(pred)
if clbk is None:
continue
# get value
@@ -60,11 +60,15 @@ class Path(extractor.Extractor):
# produce triple
yield subject, pred, value
- def __filename(self, path: str) -> str:
+ def __filename(self, path: str) -> typing.Optional[str]:
try:
return os.path.basename(path)
- except Exception:
- # FIXME: some kind of error reporting (e.g. logging)
+ except Exception: # some error, skip.
+ # FIXME: some kind of error reporting (e.g. logging)?
+ # Options: (a) Fail silently (current); (b) Skip and report to log;
+ # (c) Raise ExtractorError (aborts extraction); (d) separate content type
+ # checks from basename errors (report content type errors, skip basename
+ # errors)
return None
## EOF ##
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
index d74369c..e5387af 100644
--- a/bsie/extractor/generic/stat.py
+++ b/bsie/extractor/generic/stat.py
@@ -5,14 +5,13 @@ A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2022
"""
# imports
+import os
import typing
-# inner-module imports
+# bsie imports
from bsie.base import extractor
from bsie.utils import node, ns
-from bsie.utils.bsfs import URI
-import bsie.reader.stat
-
+from bsie.utils.bsfs import schema as _schema
# exports
__all__: typing.Sequence[str] = (
@@ -27,30 +26,31 @@ class Stat(extractor.Extractor):
CONTENT_READER = bsie.reader.stat.Stat
- def __init__(self):
- self.__callmap = {
- ns.bse.filesize: self.__filesize,
- }
+ # mapping from predicate to handler function.
+ _callmap: typing.Dict[_schema.Predicate, typing.Callable[[os.stat_result], typing.Any]]
- def schema(self) -> str:
- return '''
- bse:filesize a bsfs:Predicate ;
+ def __init__(self):
+ super().__init__(_schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
+ bse:filesize rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:integer ;
- rdf:label "File size"^^xsd:string ;
+ rdfs:label "File size"^^xsd:string ;
schema:description "File size of entity in some filesystem."^^xsd:string ;
owl:maxCardinality "INF"^^xsd:number .
- '''
+ '''))
+ self._callmap = {
+ self.schema.predicate(ns.bse.filesize): self.__filesize,
+ }
def extract(
self,
subject: node.Node,
content: CONTENT_READER.CONTENT_TYPE,
- predicates: typing.Iterable[URI],
- ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]:
+ predicates: typing.Iterable[_schema.Predicate],
+ ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]:
for pred in predicates:
# find callback
- clbk = self.__callmap.get(pred)
+ clbk = self._callmap.get(pred)
if clbk is None:
continue
# get value
@@ -60,7 +60,7 @@ class Stat(extractor.Extractor):
# produce triple
yield subject, pred, value
- def __filesize(self, content: CONTENT_READER.CONTENT_TYPE) -> int:
+ def __filesize(self, content: os.stat_result) -> typing.Optional[int]:
"""Return the file size."""
try:
return content.st_size
diff --git a/bsie/utils/bsfs.py b/bsie/utils/bsfs.py
index 1ae657c..01ec5d1 100644
--- a/bsie/utils/bsfs.py
+++ b/bsie/utils/bsfs.py
@@ -8,6 +8,7 @@ Author: Matthias Baumgartner, 2022
import typing
# bsfs imports
+from bsfs import schema
from bsfs.namespace import Namespace
from bsfs.utils import URI, typename
@@ -15,6 +16,7 @@ from bsfs.utils import URI, typename
__all__: typing.Sequence[str] = (
'Namespace',
'URI',
+ 'schema',
'typename',
)
diff --git a/bsie/utils/namespaces.py b/bsie/utils/namespaces.py
index 67ccc71..13be96b 100644
--- a/bsie/utils/namespaces.py
+++ b/bsie/utils/namespaces.py
@@ -7,13 +7,14 @@ Author: Matthias Baumgartner, 2022
# imports
import typing
-# bsie imports
+# inner-module imports
from . import bsfs as _bsfs
# constants
bse = _bsfs.Namespace('http://bsfs.ai/schema/Entity#')
bsfs = _bsfs.Namespace('http://bsfs.ai/schema/')
bsm = _bsfs.Namespace('http://bsfs.ai/schema/meta#')
+xsd = _bsfs.Namespace('http://www.w3.org/2001/XMLSchema#')
# export
__all__: typing.Sequence[str] = (
diff --git a/bsie/utils/node.py b/bsie/utils/node.py
index 60863a4..3a0f06b 100644
--- a/bsie/utils/node.py
+++ b/bsie/utils/node.py
@@ -12,7 +12,7 @@ from bsie.utils.bsfs import URI
# exports
__all__: typing.Sequence[str] = (
- 'Node'
+ 'Node',
)
diff --git a/test/extractor/generic/test_constant.py b/test/extractor/generic/test_constant.py
index f3ab0a3..7fdb8ac 100644
--- a/test/extractor/generic/test_constant.py
+++ b/test/extractor/generic/test_constant.py
@@ -20,39 +20,64 @@ from bsie.extractor.generic.constant import Constant
class TestConstant(unittest.TestCase):
def test_extract(self):
schema = '''
- bse:author a bsfs:Predicate ;
+ bse:author rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:string ;
owl:maxCardinality "1"^^xsd:number .
-
- bse:comment a bsfs:Predicate ;
+ bse:comment rdfs:subClassOf bsfs:Predicate ;
rdfs:domain bsfs:Entity ;
rdfs:range xsd:string ;
owl:maxCardinality "INF"^^xsd:number .
-
'''
tuples = [
(ns.bse.author, 'Me, myself, and I'),
(ns.bse.comment, 'the quick brown fox jumps over the lazy dog.'),
]
- node = Node(ns.bsfs.Entity, '') # Blank node
- predicates = (ns.bse.author, ns.bse.comment)
ext = Constant(schema, tuples)
+ node = Node(ns.bsfs.Entity, '') # Blank node
+ p_author = ext.schema.predicate(ns.bse.author)
+ p_comment = ext.schema.predicate(ns.bse.comment)
+ entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
+ string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string)
# baseline
- self.assertSetEqual(set(ext.extract(node, None, predicates)),
- {(node, pred, value) for pred, value in tuples})
+ self.assertSetEqual(set(ext.extract(node, None, (p_author, p_comment))),
+ {(node, p_author, 'Me, myself, and I'),
+ (node, p_comment, 'the quick brown fox jumps over the lazy dog.')})
# predicates is respected
- self.assertSetEqual(set(ext.extract(node, None, (ns.bse.author, ns.bse.foobar))),
- {(node, ns.bse.author, 'Me, myself, and I')})
- self.assertSetEqual(set(ext.extract(node, None, (ns.bse.comment, ns.bse.foobar))),
- {(node, ns.bse.comment, 'the quick brown fox jumps over the lazy dog.')})
- self.assertSetEqual(set(ext.extract(node, None, (ns.bse.foobar, ns.bse.barfoo))), set())
-
- # FIXME: should change!
- # for now: no schema compliance
- ext = Constant('', tuples)
- self.assertSetEqual(set(ext.extract(node, None, predicates)),
- {(node, pred, value) for pred, value in tuples})
+ p_foobar = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foobar, domain=entity, range=entity)
+ self.assertSetEqual(set(ext.extract(node, None, (p_author, p_foobar))),
+ {(node, p_author, 'Me, myself, and I')})
+ self.assertSetEqual(set(ext.extract(node, None, (p_comment, p_foobar))),
+ {(node, p_comment, 'the quick brown fox jumps over the lazy dog.')})
+ p_barfoo = ext.schema.predicate(ns.bse.author).get_child(ns.bse.comment, domain=entity, range=string)
+ self.assertSetEqual(set(ext.extract(node, None, (p_foobar, p_barfoo))), set())
+
+ def test_construct(self):
+ # schema compliance
+ schema = '''
+ bse:author rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ owl:maxCardinality "1"^^xsd:number .
+ bse:comment rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ owl:maxCardinality "INF"^^xsd:number .
+ '''
+ # can create a schema
+ self.assertIsInstance(Constant(schema, [
+ (ns.bse.author, 'Me, myself, and I'),
+ (ns.bse.comment, 'the quick brown fox jumps over the lazy dog.'),
+ ]), Constant)
+ # predicates are validated
+ self.assertRaises(KeyError, Constant, schema, [
+ (ns.bse.author, 'Me, myself, and I'),
+ (ns.bse.foobar, 'foobar!')])
+ # FIXME: values are validated
+ #class Foo(): pass # not string compatible
+ #self.assertRaises(ValueError, Constant, schema, [
+ # (ns.bse.author, Foo())])
+
## main ##
diff --git a/test/extractor/generic/test_path.py b/test/extractor/generic/test_path.py
index 8623490..9376c7c 100644
--- a/test/extractor/generic/test_path.py
+++ b/test/extractor/generic/test_path.py
@@ -8,7 +8,9 @@ Author: Matthias Baumgartner, 2022
import unittest
# bsie imports
+from bsie import base
from bsie.utils import ns
+from bsie.utils.bsfs import schema
from bsie.utils.node import Node
# objects to test
@@ -18,23 +20,52 @@ from bsie.extractor.generic.path import Path
## code ##
class TestPath(unittest.TestCase):
+ def test_eq(self):
+ # distinct instances, same data
+ self.assertEqual(Path(), Path())
+ # different classes
+ class Foo(): pass
+ self.assertNotEqual(Path(), Foo())
+ self.assertNotEqual(Path(), 123)
+ self.assertNotEqual(Path(), None)
+
+ def test_schema(self):
+ self.assertEqual(Path().schema,
+ schema.Schema.from_string(base.extractor.SCHEMA_PREAMBLE + '''
+ bse:filename rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:string ;
+ owl:maxCardinality "INF"^^xsd:number .
+ '''))
+
def test_extract(self):
- node = Node(ns.bsfs.Entity, '') # Blank node
ext = Path()
+ node = Node(ns.bsfs.Entity, '') # Blank node
+ content = '/tmp/foo/bar'
+ p_filename = ext.schema.predicate(ns.bse.filename)
+ entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
+ string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string)
# baseline
- self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.filename, ))),
- {(node, ns.bse.filename, 'bar')})
+ self.assertSetEqual(set(ext.extract(node, content, (p_filename, ))),
+ {(node, p_filename, 'bar')})
# predicates parameter is respected
- self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.filename, ns.bse.foo))),
- {(node, ns.bse.filename, 'bar')})
- self.assertSetEqual(set(ext.extract(node, '/tmp/foo/bar', (ns.bse.foo, ))), set())
+ p_foo = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, domain=entity, range=string) # unsupported predicate
+ self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_foo))),
+ {(node, p_filename, 'bar')})
+ self.assertSetEqual(set(ext.extract(node, content, (p_foo, ))), set())
+ # predicates are validated
+ p_bar = p_foo.get_child(ns.bse.filename) # same URI but different hierarchy
+ self.assertSetEqual(set(ext.extract(node, content, (p_filename, p_bar))),
+ {(node, p_filename, 'bar')})
+ self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set())
# path variations
- self.assertSetEqual(set(ext.extract(node, 'bar', (ns.bse.filename, ))),
- {(node, ns.bse.filename, 'bar')})
- self.assertSetEqual(set(ext.extract(node, '', (ns.bse.filename, ))),
- {(node, ns.bse.filename, '')})
- self.assertSetEqual(set(ext.extract(node, None, (ns.bse.filename, ))), set())
+ self.assertSetEqual(set(ext.extract(node, 'bar', (p_filename, ))),
+ {(node, p_filename, 'bar')})
+ self.assertSetEqual(set(ext.extract(node, '', (p_filename, ))),
+ {(node, p_filename, '')})
+ # errors are suppressed
+ self.assertSetEqual(set(ext.extract(node, None, (p_filename, ))), set())
## main ##
diff --git a/test/extractor/generic/test_stat.py b/test/extractor/generic/test_stat.py
index f89b053..26dad6a 100644
--- a/test/extractor/generic/test_stat.py
+++ b/test/extractor/generic/test_stat.py
@@ -9,7 +9,9 @@ import os
import unittest
# bsie imports
+from bsie import base
from bsie.utils import ns
+from bsie.utils.bsfs import schema
from bsie.utils.node import Node
# objects to test
@@ -18,21 +20,51 @@ from bsie.extractor.generic.stat import Stat
## code ##
-class TestConstant(unittest.TestCase):
+class TestStat(unittest.TestCase):
+ def test_eq(self):
+ # distinct instances, same data
+ self.assertEqual(Stat(), Stat())
+ # different classes
+ class Foo(): pass
+ self.assertNotEqual(Stat(), Foo())
+ self.assertNotEqual(Stat(), 123)
+ self.assertNotEqual(Stat(), None)
+
+ def test_schema(self):
+ self.assertEqual(Stat().schema,
+ schema.Schema.from_string(base.extractor.SCHEMA_PREAMBLE + '''
+ bse:filesize rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsfs:Entity ;
+ rdfs:range xsd:integer ;
+ owl:maxCardinality "INF"^^xsd:number .
+ '''))
+
def test_extract(self):
+ ext = Stat()
node = Node(ns.bsfs.Entity, '') # Blank node
content = os.stat(__file__)
- ext = Stat()
+ p_filesize = ext.schema.predicate(ns.bse.filesize)
+ entity = ext.schema.node(ns.bsfs.Node).get_child(ns.bsfs.Entity)
+ string = ext.schema.literal(ns.bsfs.Literal).get_child(ns.xsd.string)
# baseline
- self.assertSetEqual(set(ext.extract(node, content, (ns.bse.filesize, ))),
- {(node, ns.bse.filesize, content.st_size)})
+ self.assertSetEqual(set(ext.extract(node, content, (p_filesize, ))),
+ {(node, p_filesize, content.st_size)})
# predicates parameter is respected
- self.assertSetEqual(set(ext.extract(node, content, (ns.bse.filesize, ns.bse.foo))),
- {(node, ns.bse.filesize, content.st_size)})
- self.assertSetEqual(set(ext.extract(node, content, (ns.bse.foo, ))), set())
+ p_foo = ext.schema.predicate(ns.bsfs.Predicate).get_child(ns.bse.foo, domain=entity, range=string) # unsupported predicate
+ self.assertSetEqual(set(ext.extract(node, content, (p_filesize, p_foo))),
+ {(node, p_filesize, content.st_size)})
+ self.assertSetEqual(set(ext.extract(node, content, (p_foo, ))), set())
+ # predicates are validated
+ p_bar = p_foo.get_child(ns.bse.filesizse) # same URI but different hierarchy
+ self.assertSetEqual(set(ext.extract(node, content, (p_filesize, p_bar))),
+ {(node, p_filesize, content.st_size)})
+ self.assertSetEqual(set(ext.extract(node, content, (p_bar, ))), set())
# content variations
- self.assertSetEqual(set(ext.extract(node, None, (ns.bse.filesize, ))), set())
+ self.assertSetEqual(set(ext.extract(node, os.stat_result([12345] * len(content)), (p_filesize, p_bar))),
+ {(node, p_filesize, 12345)})
+ # errors are suppressed
+ self.assertSetEqual(set(ext.extract(node, None, (p_filesize, ))), set())
## main ##