aboutsummaryrefslogtreecommitdiffstats
path: root/bsie/base
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2022-12-15 17:12:56 +0100
committerMatthias Baumgartner <dev@igsor.net>2022-12-15 17:12:56 +0100
commit8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc (patch)
tree88997fca735bcc6f7f542dde376f25579be25f9b /bsie/base
parent3b7fee369924eb7704709edeb8c17fff9c020dfb (diff)
downloadbsie-8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc.tar.gz
bsie-8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc.tar.bz2
bsie-8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc.zip
file node class in default schema
Diffstat (limited to 'bsie/base')
-rw-r--r--bsie/base/extractor.py17
1 files changed, 8 insertions, 9 deletions
diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
index a5c7846..678dcec 100644
--- a/bsie/base/extractor.py
+++ b/bsie/base/extractor.py
@@ -35,6 +35,7 @@ SCHEMA_PREAMBLE = '''
# essential nodes
bsfs:Entity rdfs:subClassOf bsfs:Node .
+ bsfs:File rdfs:subClassOf bsfs:Entity .
# common definitions
xsd:string rdfs:subClassOf bsfs:Literal .
@@ -77,15 +78,13 @@ class Extractor(abc.ABC):
return hash((type(self), self.CONTENT_READER, self.schema))
def predicates(self) -> typing.Iterator[_schema.Predicate]:
- """Return the predicates that may be part of extracted triples."""
- # NOTE: Some predicates in the schema might not occur in actual triples,
- # but are defined due to predicate class hierarchy. E.g., bsfs:Predicate
- # is part of every schema but should not be used in triples.
- # Announcing all predicates might not be the most efficient way, however,
- # it is the most safe one. Concrete extractors that produce additional
- # predicates (e.g. auxiliary nodes with their own predicates) should
- # overwrite this method to only include the principal predicates.
- return self.schema.predicates()
+ ent = self.schema.node(ns.bsfs.Entity)
+ return (
+ pred
+ for pred
+ in self.schema.predicates()
+ if pred.domain <= ent or (pred.range is not None and pred.range <= ent)
+ )
@abc.abstractmethod
def extract(