aboutsummaryrefslogtreecommitdiffstats
path: root/bsie
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2022-12-15 17:12:56 +0100
committerMatthias Baumgartner <dev@igsor.net>2022-12-15 17:12:56 +0100
commit8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc (patch)
tree88997fca735bcc6f7f542dde376f25579be25f9b /bsie
parent3b7fee369924eb7704709edeb8c17fff9c020dfb (diff)
downloadbsie-8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc.tar.gz
bsie-8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc.tar.bz2
bsie-8e6d27ea75d2c8d68f6dd8b3d529aaa278f291cc.zip
file node class in default schema
Diffstat (limited to 'bsie')
-rw-r--r--bsie/base/extractor.py17
-rw-r--r--bsie/extractor/generic/path.py2
-rw-r--r--bsie/extractor/generic/stat.py2
-rw-r--r--bsie/tools/pipeline.py4
4 files changed, 12 insertions, 13 deletions
diff --git a/bsie/base/extractor.py b/bsie/base/extractor.py
index a5c7846..678dcec 100644
--- a/bsie/base/extractor.py
+++ b/bsie/base/extractor.py
@@ -35,6 +35,7 @@ SCHEMA_PREAMBLE = '''
# essential nodes
bsfs:Entity rdfs:subClassOf bsfs:Node .
+ bsfs:File rdfs:subClassOf bsfs:Entity .
# common definitions
xsd:string rdfs:subClassOf bsfs:Literal .
@@ -77,15 +78,13 @@ class Extractor(abc.ABC):
return hash((type(self), self.CONTENT_READER, self.schema))
def predicates(self) -> typing.Iterator[_schema.Predicate]:
- """Return the predicates that may be part of extracted triples."""
- # NOTE: Some predicates in the schema might not occur in actual triples,
- # but are defined due to predicate class hierarchy. E.g., bsfs:Predicate
- # is part of every schema but should not be used in triples.
- # Announcing all predicates might not be the most efficient way, however,
- # it is the most safe one. Concrete extractors that produce additional
- # predicates (e.g. auxiliary nodes with their own predicates) should
- # overwrite this method to only include the principal predicates.
- return self.schema.predicates()
+ ent = self.schema.node(ns.bsfs.Entity)
+ return (
+ pred
+ for pred
+ in self.schema.predicates()
+ if pred.domain <= ent or (pred.range is not None and pred.range <= ent)
+ )
@abc.abstractmethod
def extract(
diff --git a/bsie/extractor/generic/path.py b/bsie/extractor/generic/path.py
index 2cc592a..00165e3 100644
--- a/bsie/extractor/generic/path.py
+++ b/bsie/extractor/generic/path.py
@@ -32,7 +32,7 @@ class Path(extractor.Extractor):
def __init__(self):
super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
bse:filename rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
+ rdfs:domain bsfs:File ;
rdfs:range xsd:string ;
rdfs:label "File name"^^xsd:string ;
schema:description "Filename of entity in some filesystem."^^xsd:string ;
diff --git a/bsie/extractor/generic/stat.py b/bsie/extractor/generic/stat.py
index dfde7d2..0f4267f 100644
--- a/bsie/extractor/generic/stat.py
+++ b/bsie/extractor/generic/stat.py
@@ -32,7 +32,7 @@ class Stat(extractor.Extractor):
def __init__(self):
super().__init__(bsfs.schema.Schema.from_string(extractor.SCHEMA_PREAMBLE + '''
bse:filesize rdfs:subClassOf bsfs:Predicate ;
- rdfs:domain bsfs:Entity ;
+ rdfs:domain bsfs:File ;
rdfs:range xsd:integer ;
rdfs:label "File size"^^xsd:string ;
schema:description "File size of entity in some filesystem."^^xsd:string ;
diff --git a/bsie/tools/pipeline.py b/bsie/tools/pipeline.py
index 7fdd935..3d08993 100644
--- a/bsie/tools/pipeline.py
+++ b/bsie/tools/pipeline.py
@@ -97,8 +97,8 @@ class Pipeline():
rdr2ext[rdr].add(ext)
# create subject for file
- uuid = _uuid.UCID.from_path(path)
- subject = Node(ns.bsfs.Entity, self._prefix + uuid)
+ uuid = bsfs.uuid.UCID.from_path(path)
+ subject = node.Node(ns.bsfs.File, self._prefix + 'file#' + uuid)
# extract information
for rdr, extrs in rdr2ext.items():