diff options
author | Matthias Baumgartner <dev@igsor.net> | 2022-12-15 17:12:56 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2022-12-15 17:12:56 +0100 |
commit | 8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc (patch) | |
tree | 88997fca735bcc6f7f542dde376f25579be25f9b /bsie/base | |
parent | 3b7fee369924eb7704709edeb8c17fff9c020dfb (diff) | |
download | bsie-8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc.tar.gz bsie-8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc.tar.bz2 bsie-8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc.zip |
file node class in default schema
Diffstat (limited to 'bsie/base')
-rw-r--r-- | bsie/base/extractor.py | 17 |
1 files changed, 8 insertions, 9 deletions
diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index a5c7846..678dcec 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -35,6 +35,7 @@ SCHEMA_PREAMBLE = ''' # essential nodes bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:File rdfs:subClassOf bsfs:Entity . # common definitions xsd:string rdfs:subClassOf bsfs:Literal . @@ -77,15 +78,13 @@ class Extractor(abc.ABC): return hash((type(self), self.CONTENT_READER, self.schema)) def predicates(self) -> typing.Iterator[_schema.Predicate]: - """Return the predicates that may be part of extracted triples.""" - # NOTE: Some predicates in the schema might not occur in actual triples, - # but are defined due to predicate class hierarchy. E.g., bsfs:Predicate - # is part of every schema but should not be used in triples. - # Announcing all predicates might not be the most efficient way, however, - # it is the most safe one. Concrete extractors that produce additional - # predicates (e.g. auxiliary nodes with their own predicates) should - # overwrite this method to only include the principal predicates. - return self.schema.predicates() + ent = self.schema.node(ns.bsfs.Entity) + return ( + pred + for pred + in self.schema.predicates() + if pred.domain <= ent or (pred.range is not None and pred.range <= ent) + ) @abc.abstractmethod def extract( |