From d2b4a528465dc01e8db92b61293c458c7911a333 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 31 Oct 2022 12:21:22 +0100 Subject: essential interfaces (reader, extractor, errors) --- bsie/base/__init__.py | 24 ++++++++++++++++++++++++ bsie/base/errors.py | 22 ++++++++++++++++++++++ bsie/base/extractor.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ bsie/base/reader.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 144 insertions(+) create mode 100644 bsie/base/__init__.py create mode 100644 bsie/base/errors.py create mode 100644 bsie/base/extractor.py create mode 100644 bsie/base/reader.py (limited to 'bsie/base') diff --git a/bsie/base/__init__.py b/bsie/base/__init__.py new file mode 100644 index 0000000..0154862 --- /dev/null +++ b/bsie/base/__init__.py @@ -0,0 +1,24 @@ +"""The base module defines the BSIE interfaces. + +You'll mostly find abstract classes here. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# inner-module imports +from . import errors +from . import extractor +from . import reader + +# exports +__all__: typing.Sequence[str] = ( + 'errors', + 'extractor', + 'reader', + ) + +## EOF ## diff --git a/bsie/base/errors.py b/bsie/base/errors.py new file mode 100644 index 0000000..f86ffb2 --- /dev/null +++ b/bsie/base/errors.py @@ -0,0 +1,22 @@ +"""Common BSIE exceptions. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import typing + +# exports +__all__: typing.Sequence[str] = [] + + +## code ## + +class _BSIE_Error(Exception): + """Generic BSIE error.""" + +class ReaderError(_BSIE_Error): + """The Reader failed to read the given file.""" + +## EOF ## diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py new file mode 100644 index 0000000..d5b0922 --- /dev/null +++ b/bsie/base/extractor.py @@ -0,0 +1,50 @@ +"""The Extractor classes transform content into triples. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import abc +import collections +import typing + +# inner-module imports +from . import reader +from bsie.utils import node +from bsie.utils.bsfs import URI, typename + +# exports +__all__: typing.Sequence[str] = ( + 'Extractor', + ) + + +## code ## + +class Extractor(abc.ABC, collections.abc.Iterable, collections.abc.Callable): + """Produce (node, predicate, value)-triples from some content.""" + + # what type of content is expected (i.e. reader subclass). + CONTENT_READER: typing.Optional[typing.Type[reader.Reader]] = None + + def __str__(self) -> str: + return typename(self) + + def __repr__(self) -> str: + return f'{typename(self)}()' + + @abc.abstractmethod + def schema(self) -> str: + """Return the schema (predicates and nodes) produced by this Extractor.""" + + @abc.abstractmethod + def extract( + self, + subject: node.Node, + content: typing.Any, + predicates: typing.Iterable[URI], + ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + """Return (node, predicate, value) triples.""" + +## EOF ## diff --git a/bsie/base/reader.py b/bsie/base/reader.py new file mode 100644 index 0000000..f29e451 --- /dev/null +++ b/bsie/base/reader.py @@ -0,0 +1,48 @@ +"""The Reader classes return high-level content structures from files. + +The Reader fulfills two purposes: + First, it brokers between multiple libraries and file formats. + Second, it separates multiple aspects of a file into distinct content types. + +Part of the bsie module. +A copy of the license is provided with the project. +Author: Matthias Baumgartner, 2022 +""" +# imports +import abc +import typing + +# inner-module imports +from bsie.utils.bsfs import URI, typename + +# exports +__all__: typing.Sequence[str] = ( + 'Aggregator', + 'Reader', + ) + + +## code ## + +class Reader(abc.ABC): + """Read and return some content from a file.""" + + # In what data structure content is returned + CONTENT_TYPE = typing.Union[typing.Any] + # NOTE: Child classes must also assign a typing.Union even if there's + # only one options + + def __str__(self) -> str: + return typename(self) + + def __repr__(self) -> str: + return f'{typename(self)}()' + + # FIXME: How about using contexts instead of calls? + @abc.abstractmethod + def __call__(self, path: URI) -> CONTENT_TYPE: + """Return some content of the file at *path*. + Raises a `ReaderError` if the reader cannot make sense of the file format. + """ + +## EOF ## -- cgit v1.2.3 From 2da348c638ac5058d5acf09ab5df323ee04503d5 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Mon, 31 Oct 2022 14:14:42 +0100 Subject: constant, filesize, and filename extractors --- bsie/base/extractor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'bsie/base') diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index d5b0922..ea43925 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -6,7 +6,6 @@ Author: Matthias Baumgartner, 2022 """ # imports import abc -import collections import typing # inner-module imports @@ -22,7 +21,7 @@ __all__: typing.Sequence[str] = ( ## code ## -class Extractor(abc.ABC, collections.abc.Iterable, collections.abc.Callable): +class Extractor(abc.ABC): """Produce (node, predicate, value)-triples from some content.""" # what type of content is expected (i.e. reader subclass). -- cgit v1.2.3 From e174a25585e64eb1b0759440cad48d642dd31829 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:31:29 +0100 Subject: use schema and predicate types in extractors --- bsie/base/errors.py | 13 ++++++++++--- bsie/base/extractor.py | 51 ++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 55 insertions(+), 9 deletions(-) (limited to 'bsie/base') diff --git a/bsie/base/errors.py b/bsie/base/errors.py index f86ffb2..eedce3b 100644 --- a/bsie/base/errors.py +++ b/bsie/base/errors.py @@ -8,15 +8,22 @@ Author: Matthias Baumgartner, 2022 import typing # exports -__all__: typing.Sequence[str] = [] +__all__: typing.Sequence[str] = ( + 'ExtractorError', + ) + + ## code ## -class _BSIE_Error(Exception): +class _BSIEError(Exception): """Generic BSIE error.""" -class ReaderError(_BSIE_Error): +class ExtractorError(_BSIEError): + """The Extractor failed to process the given content.""" + +class ReaderError(_BSIEError): """The Reader failed to read the given file.""" ## EOF ## diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index ea43925..a6a69c6 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -11,13 +11,38 @@ import typing # inner-module imports from . import reader from bsie.utils import node -from bsie.utils.bsfs import URI, typename +from bsie.utils.bsfs import schema as _schema, typename # exports __all__: typing.Sequence[str] = ( 'Extractor', ) +# constants + +# essential definitions typically used in extractor schemas. +# NOTE: The definition here is only for convenience; Each Extractor must implement its use, if so desired. +SCHEMA_PREAMBLE = ''' + # common external prefixes + prefix owl: + prefix rdf: + prefix rdfs: + prefix xsd: + prefix schema: + + # common bsfs prefixes + prefix bsfs: + prefix bse: + + # essential nodes + bsfs:Entity rdfs:subClassOf bsfs:Node . + + # common definitions + xsd:string rdfs:subClassOf bsfs:Literal . + xsd:integer rdfs:subClassOf bsfs:Literal . + + ''' + ## code ## @@ -27,23 +52,37 @@ class Extractor(abc.ABC): # what type of content is expected (i.e. reader subclass). CONTENT_READER: typing.Optional[typing.Type[reader.Reader]] = None + # extractor schema. + schema: _schema.Schema + + def __init__(self, schema: _schema.Schema): + self.schema = schema + def __str__(self) -> str: return typename(self) def __repr__(self) -> str: return f'{typename(self)}()' - @abc.abstractmethod - def schema(self) -> str: - """Return the schema (predicates and nodes) produced by this Extractor.""" + + def predicates(self) -> typing.Iterator[_schema.Predicate]: + """Return the predicates that may be part of extracted triples.""" + # NOTE: Some predicates in the schema might not occur in actual triples, + # but are defined due to predicate class hierarchy. E.g., bsfs:Predicate + # is part of every schema but should not be used in triples. + # Announcing all predicates might not be the most efficient way, however, + # it is the most safe one. Concrete extractors that produce additional + # predicates (e.g. auxiliary nodes with their own predicates) should + # overwrite this method to only include the principal predicates. + return self.schema.predicates() @abc.abstractmethod def extract( self, subject: node.Node, content: typing.Any, - predicates: typing.Iterable[URI], - ) -> typing.Iterator[typing.Tuple[node.Node, URI, typing.Any]]: + predicates: typing.Iterable[_schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]: """Return (node, predicate, value) triples.""" ## EOF ## -- cgit v1.2.3 From b96c6e2096c387b70e2a4c1f0bc53b6044a0dc6f Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:36:27 +0100 Subject: decouple readers and extractors; use strings for reference and repeated type annotations --- bsie/base/extractor.py | 5 ++--- bsie/base/reader.py | 11 ++--------- 2 files changed, 4 insertions(+), 12 deletions(-) (limited to 'bsie/base') diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index a6a69c6..7acf2bd 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -8,8 +8,7 @@ Author: Matthias Baumgartner, 2022 import abc import typing -# inner-module imports -from . import reader +# bsie imports from bsie.utils import node from bsie.utils.bsfs import schema as _schema, typename @@ -50,7 +49,7 @@ class Extractor(abc.ABC): """Produce (node, predicate, value)-triples from some content.""" # what type of content is expected (i.e. reader subclass). - CONTENT_READER: typing.Optional[typing.Type[reader.Reader]] = None + CONTENT_READER: typing.Optional[str] = None # extractor schema. schema: _schema.Schema diff --git a/bsie/base/reader.py b/bsie/base/reader.py index f29e451..e59abef 100644 --- a/bsie/base/reader.py +++ b/bsie/base/reader.py @@ -12,12 +12,11 @@ Author: Matthias Baumgartner, 2022 import abc import typing -# inner-module imports +# bsie imports from bsie.utils.bsfs import URI, typename # exports __all__: typing.Sequence[str] = ( - 'Aggregator', 'Reader', ) @@ -27,20 +26,14 @@ __all__: typing.Sequence[str] = ( class Reader(abc.ABC): """Read and return some content from a file.""" - # In what data structure content is returned - CONTENT_TYPE = typing.Union[typing.Any] - # NOTE: Child classes must also assign a typing.Union even if there's - # only one options - def __str__(self) -> str: return typename(self) def __repr__(self) -> str: return f'{typename(self)}()' - # FIXME: How about using contexts instead of calls? @abc.abstractmethod - def __call__(self, path: URI) -> CONTENT_TYPE: + def __call__(self, path: URI) -> typing.Any: """Return some content of the file at *path*. Raises a `ReaderError` if the reader cannot make sense of the file format. """ -- cgit v1.2.3 From 9ce32829b2bb85907a34a543bfcaa9183d1e362c Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:39:18 +0100 Subject: string conversion and equality checks --- bsie/base/extractor.py | 7 +++++++ bsie/base/reader.py | 6 ++++++ 2 files changed, 13 insertions(+) (limited to 'bsie/base') diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index 7acf2bd..2fc4f18 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -63,6 +63,13 @@ class Extractor(abc.ABC): def __repr__(self) -> str: return f'{typename(self)}()' + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) \ + and self.CONTENT_READER == other.CONTENT_READER \ + and self.schema == other.schema + + def __hash__(self) -> int: + return hash((type(self), self.CONTENT_READER, self.schema)) def predicates(self) -> typing.Iterator[_schema.Predicate]: """Return the predicates that may be part of extracted triples.""" diff --git a/bsie/base/reader.py b/bsie/base/reader.py index e59abef..b7eabf7 100644 --- a/bsie/base/reader.py +++ b/bsie/base/reader.py @@ -32,6 +32,12 @@ class Reader(abc.ABC): def __repr__(self) -> str: return f'{typename(self)}()' + def __eq__(self, other: typing.Any) -> bool: + return isinstance(other, type(self)) + + def __hash__(self) -> int: + return hash(type(self)) + @abc.abstractmethod def __call__(self, path: URI) -> typing.Any: """Return some content of the file at *path*. -- cgit v1.2.3 From c9a1dea230054f5d6f40b7fd5e3930609c5f6416 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:41:38 +0100 Subject: code analysis tool configs and minor fixes --- bsie/base/errors.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'bsie/base') diff --git a/bsie/base/errors.py b/bsie/base/errors.py index eedce3b..a86b7e8 100644 --- a/bsie/base/errors.py +++ b/bsie/base/errors.py @@ -10,11 +10,10 @@ import typing # exports __all__: typing.Sequence[str] = ( 'ExtractorError', + 'ReaderError', ) - - ## code ## class _BSIEError(Exception): -- cgit v1.2.3 From 3e6a69ce7f109f0fd4352507ad60d58d4cbd24a7 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 25 Nov 2022 14:43:12 +0100 Subject: builders and pipeline --- bsie/base/errors.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'bsie/base') diff --git a/bsie/base/errors.py b/bsie/base/errors.py index a86b7e8..760351f 100644 --- a/bsie/base/errors.py +++ b/bsie/base/errors.py @@ -9,7 +9,9 @@ import typing # exports __all__: typing.Sequence[str] = ( + 'BuilderError', 'ExtractorError', + 'LoaderError', 'ReaderError', ) @@ -19,6 +21,12 @@ __all__: typing.Sequence[str] = ( class _BSIEError(Exception): """Generic BSIE error.""" +class BuilderError(_BSIEError): + """The Builder failed to create an instance.""" + +class LoaderError(BuilderError): + """Failed to load a module or class.""" + class ExtractorError(_BSIEError): """The Extractor failed to process the given content.""" -- cgit v1.2.3 From edc747252a04675c46059215751719b6666a77f9 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Sat, 3 Dec 2022 18:57:58 +0100 Subject: adapt to schema interface update: owl:maxCardinality changed to bsfs:unique --- bsie/base/extractor.py | 1 - 1 file changed, 1 deletion(-) (limited to 'bsie/base') diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index 2fc4f18..75b7173 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -23,7 +23,6 @@ __all__: typing.Sequence[str] = ( # NOTE: The definition here is only for convenience; Each Extractor must implement its use, if so desired. SCHEMA_PREAMBLE = ''' # common external prefixes - prefix owl: prefix rdf: prefix rdfs: prefix xsd: -- cgit v1.2.3 From 559e643bb1fa39feefd2eb73847ad9420daf1deb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Wed, 14 Dec 2022 06:10:25 +0100 Subject: bsie extraction and info apps --- bsie/base/errors.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'bsie/base') diff --git a/bsie/base/errors.py b/bsie/base/errors.py index 760351f..dc3c30e 100644 --- a/bsie/base/errors.py +++ b/bsie/base/errors.py @@ -33,4 +33,10 @@ class ExtractorError(_BSIEError): class ReaderError(_BSIEError): """The Reader failed to read the given file.""" +class ProgrammingError(_BSIEError): + """An assertion-like error that indicates a code-base issue.""" + +class UnreachableError(ProgrammingError): + """Bravo, you've reached a point in code that should logically not be reachable.""" + ## EOF ## -- cgit v1.2.3 From 3dc3e9a9b0fc8c9727f91359814866d3deae6e79 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 16:42:07 +0100 Subject: minor fixes and comments --- bsie/base/extractor.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'bsie/base') diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index 75b7173..bfa403c 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -20,7 +20,7 @@ __all__: typing.Sequence[str] = ( # constants # essential definitions typically used in extractor schemas. -# NOTE: The definition here is only for convenience; Each Extractor must implement its use, if so desired. +# NOTE: This preamble is only for convenience; Each Extractor must implement its use, if so desired. SCHEMA_PREAMBLE = ''' # common external prefixes prefix rdf: @@ -45,7 +45,12 @@ SCHEMA_PREAMBLE = ''' ## code ## class Extractor(abc.ABC): - """Produce (node, predicate, value)-triples from some content.""" + """Produce (subject, predicate, value)-triples from some content. + The Extractor produces princpal predicates that provide information + about the content itself (i.e., triples that include the subject), + and may also generate triples with auxiliary predicates if the + extracted value is a node itself. + """ # what type of content is expected (i.e. reader subclass). CONTENT_READER: typing.Optional[str] = None -- cgit v1.2.3 From 49cf03fc212c813862453de5352436dc90d1e458 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 16:50:53 +0100 Subject: imports and init files --- bsie/base/__init__.py | 8 ++++---- bsie/base/reader.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'bsie/base') diff --git a/bsie/base/__init__.py b/bsie/base/__init__.py index 0154862..0d362cd 100644 --- a/bsie/base/__init__.py +++ b/bsie/base/__init__.py @@ -11,14 +11,14 @@ import typing # inner-module imports from . import errors -from . import extractor -from . import reader +from .extractor import Extractor +from .reader import Reader # exports __all__: typing.Sequence[str] = ( + 'Extractor', + 'Reader', 'errors', - 'extractor', - 'reader', ) ## EOF ## diff --git a/bsie/base/reader.py b/bsie/base/reader.py index b7eabf7..cbabd36 100644 --- a/bsie/base/reader.py +++ b/bsie/base/reader.py @@ -13,7 +13,7 @@ import abc import typing # bsie imports -from bsie.utils.bsfs import URI, typename +from bsie.utils import bsfs # exports __all__: typing.Sequence[str] = ( @@ -27,10 +27,10 @@ class Reader(abc.ABC): """Read and return some content from a file.""" def __str__(self) -> str: - return typename(self) + return bsfs.typename(self) def __repr__(self) -> str: - return f'{typename(self)}()' + return f'{bsfs.typename(self)}()' def __eq__(self, other: typing.Any) -> bool: return isinstance(other, type(self)) @@ -39,7 +39,7 @@ class Reader(abc.ABC): return hash(type(self)) @abc.abstractmethod - def __call__(self, path: URI) -> typing.Any: + def __call__(self, path: bsfs.URI) -> typing.Any: """Return some content of the file at *path*. Raises a `ReaderError` if the reader cannot make sense of the file format. """ -- cgit v1.2.3 From 3b7fee369924eb7704709edeb8c17fff9c020dfb Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:06:09 +0100 Subject: import fixes --- bsie/base/extractor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'bsie/base') diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index bfa403c..a5c7846 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -11,6 +11,7 @@ import typing # bsie imports from bsie.utils import node from bsie.utils.bsfs import schema as _schema, typename +from bsie.utils import bsfs, node, ns # exports __all__: typing.Sequence[str] = ( @@ -62,10 +63,10 @@ class Extractor(abc.ABC): self.schema = schema def __str__(self) -> str: - return typename(self) + return bsfs.typename(self) def __repr__(self) -> str: - return f'{typename(self)}()' + return f'{bsfs.typename(self)}()' def __eq__(self, other: typing.Any) -> bool: return isinstance(other, type(self)) \ -- cgit v1.2.3 From 8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:12:56 +0100 Subject: file node class in default schema --- bsie/base/extractor.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'bsie/base') diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index a5c7846..678dcec 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -35,6 +35,7 @@ SCHEMA_PREAMBLE = ''' # essential nodes bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:File rdfs:subClassOf bsfs:Entity . # common definitions xsd:string rdfs:subClassOf bsfs:Literal . @@ -77,15 +78,13 @@ class Extractor(abc.ABC): return hash((type(self), self.CONTENT_READER, self.schema)) def predicates(self) -> typing.Iterator[_schema.Predicate]: - """Return the predicates that may be part of extracted triples.""" - # NOTE: Some predicates in the schema might not occur in actual triples, - # but are defined due to predicate class hierarchy. E.g., bsfs:Predicate - # is part of every schema but should not be used in triples. - # Announcing all predicates might not be the most efficient way, however, - # it is the most safe one. Concrete extractors that produce additional - # predicates (e.g. auxiliary nodes with their own predicates) should - # overwrite this method to only include the principal predicates. - return self.schema.predicates() + ent = self.schema.node(ns.bsfs.Entity) + return ( + pred + for pred + in self.schema.predicates() + if pred.domain <= ent or (pred.range is not None and pred.range <= ent) + ) @abc.abstractmethod def extract( -- cgit v1.2.3 From 3426b4e201cf03b78d2a3f144876955fcda2f66b Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:17:53 +0100 Subject: extractor interface revision * schema as property * predicates -> principals --- bsie/base/extractor.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'bsie/base') diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index 678dcec..c44021b 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -9,8 +9,6 @@ import abc import typing # bsie imports -from bsie.utils import node -from bsie.utils.bsfs import schema as _schema, typename from bsie.utils import bsfs, node, ns # exports @@ -58,10 +56,10 @@ class Extractor(abc.ABC): CONTENT_READER: typing.Optional[str] = None # extractor schema. - schema: _schema.Schema + _schema: bsfs.schema.Schema - def __init__(self, schema: _schema.Schema): - self.schema = schema + def __init__(self, schema: bsfs.schema.Schema): + self._schema = schema def __str__(self) -> str: return bsfs.typename(self) @@ -77,7 +75,14 @@ class Extractor(abc.ABC): def __hash__(self) -> int: return hash((type(self), self.CONTENT_READER, self.schema)) - def predicates(self) -> typing.Iterator[_schema.Predicate]: + @property + def schema(self) -> bsfs.schema.Schema: + """Return the extractor's schema.""" + return self._schema + + @property + def principals(self) -> typing.Iterator[bsfs.schema.Predicate]: + """Return the principal predicates, i.e., relations from/to the extraction subject.""" ent = self.schema.node(ns.bsfs.Entity) return ( pred @@ -91,8 +96,8 @@ class Extractor(abc.ABC): self, subject: node.Node, content: typing.Any, - predicates: typing.Iterable[_schema.Predicate], - ) -> typing.Iterator[typing.Tuple[node.Node, _schema.Predicate, typing.Any]]: + principals: typing.Iterable[bsfs.schema.Predicate], + ) -> typing.Iterator[typing.Tuple[node.Node, bsfs.schema.Predicate, typing.Any]]: """Return (node, predicate, value) triples.""" ## EOF ## -- cgit v1.2.3