From 8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Thu, 15 Dec 2022 17:12:56 +0100 Subject: file node class in default schema --- bsie/base/extractor.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'bsie/base/extractor.py') diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index a5c7846..678dcec 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -35,6 +35,7 @@ SCHEMA_PREAMBLE = ''' # essential nodes bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:File rdfs:subClassOf bsfs:Entity . # common definitions xsd:string rdfs:subClassOf bsfs:Literal . @@ -77,15 +78,13 @@ class Extractor(abc.ABC): return hash((type(self), self.CONTENT_READER, self.schema)) def predicates(self) -> typing.Iterator[_schema.Predicate]: - """Return the predicates that may be part of extracted triples.""" - # NOTE: Some predicates in the schema might not occur in actual triples, - # but are defined due to predicate class hierarchy. E.g., bsfs:Predicate - # is part of every schema but should not be used in triples. - # Announcing all predicates might not be the most efficient way, however, - # it is the most safe one. Concrete extractors that produce additional - # predicates (e.g. auxiliary nodes with their own predicates) should - # overwrite this method to only include the principal predicates. - return self.schema.predicates() + ent = self.schema.node(ns.bsfs.Entity) + return ( + pred + for pred + in self.schema.predicates() + if pred.domain <= ent or (pred.range is not None and pred.range <= ent) + ) @abc.abstractmethod def extract( -- cgit v1.2.3