diff options
author | Matthias Baumgartner <dev@igsor.net> | 2022-12-15 17:12:56 +0100 |
---|---|---|
committer | Matthias Baumgartner <dev@igsor.net> | 2022-12-15 17:12:56 +0100 |
commit | 8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc (patch) | |
tree | 88997fca735bcc6f7f542dde376f25579be25f9b /bsie | |
parent | 3b7fee369924eb7704709edeb8c17fff9c020dfb (diff) | |
download | bsie-8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc.tar.gz bsie-8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc.tar.bz2 bsie-8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc.zip |
file node class in default schema
Diffstat (limited to 'bsie')
-rw-r--r-- | bsie/base/extractor.py | 17 | ||||
-rw-r--r-- | bsie/extractor/generic/path.py | 2 | ||||
-rw-r--r-- | bsie/extractor/generic/stat.py | 2 | ||||
-rw-r--r-- | bsie/tools/pipeline.py | 4 |
4 files changed, 12 insertions, 13 deletions
diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py index a5c7846..678dcec 100644 --- a/bsie/base/extractor.py +++ b/bsie/base/extractor.py @@ -35,6 +35,7 @@ SCHEMA_PREAMBLE = ''' # essential nodes bsfs:Entity rdfs:subClassOf bsfs:Node . + bsfs:File rdfs:subClassOf bsfs:Entity . # common definitions xsd:string rdfs:subClassOf bsfs:Literal . @@ -77,15 +78,13 @@ class Extractor(abc.ABC): return hash((type(self), self.CONTENT_READER, self.schema)) def predicates(self) -> typing.Iterator[_schema.Predicate]: - """Return the predicates that may be part of extracted triples.""" - # NOTE: Some predicates in the schema might not occur in actual triples, - # but are defined due to predicate class hierarchy. E.g., bsfs:Predicate - # is part of every schema but should not be used in triples. - # Announcing all predicates might not be the most efficient way, however, - # it is the most safe one. Concrete extractors that produce additional - # predicates (e.g. auxiliary nodes with their own predicates) should - # overwrite this method to only include the principal predicates. - return self.schema.predicates() + ent = self.schema.node(ns.bsfs.Entity) + return ( + pred + for pred + in self.schema.predicates() + if pred.domain <= ent or (pred.range is not None and pred.range <= ent) + ) @abc.abstractmethod def extract( diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py index 2cc592a..00165e3 100644 --- a/bsie/extractor/generic/path.py +++ b/bsie/extractor/generic/path.py @@ -32,7 +32,7 @@ class Path(extractor.Extractor): def __init__(self): super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filename rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:string ; rdfs:label "File name"^^xsd:string ; schema:description "Filename of entity in some filesystem."^^xsd:string ; diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py index dfde7d2..0f4267f 100644 --- a/bsie/extractor/generic/stat.py +++ b/bsie/extractor/generic/stat.py @@ -32,7 +32,7 @@ class Stat(extractor.Extractor): def __init__(self): super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + ''' bse:filesize rdfs:subClassOf bsfs:Predicate ; - rdfs:domain bsfs:Entity ; + rdfs:domain bsfs:File ; rdfs:range xsd:integer ; rdfs:label "File size"^^xsd:string ; schema:description "File size of entity in some filesystem."^^xsd:string ; diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py index 7fdd935..3d08993 100644 --- a/bsie/tools/pipeline.py +++ b/bsie/tools/pipeline.py @@ -97,8 +97,8 @@ class Pipeline(): rdr2ext[rdr].add(ext) # create subject for file - uuid = _uuid.UCID.from_path(path) - subject = Node(ns.bsfs.Entity, self._prefix + uuid) + uuid = bsfs.uuid.UCID.from_path(path) + subject = node.Node(ns.bsfs.File, self._prefix + 'file#' + uuid) # extract information for rdr, extrs in rdr2ext.items(): |