aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-03-02 16:40:00 +0100
committerMatthias Baumgartner <dev@igsor.net>2023-03-02 16:40:00 +0100
commit28a021483c13e974e00b6159f0653b0727df9d10 (patch)
treee5aa55337a5b2e5c5e34b7892a0cbe0997eb0da8
parentb66ed641d5cbb4cb83f4a571223e4d65d80ed05c (diff)
downloadbsfs-28a021483c13e974e00b6159f0653b0727df9d10.tar.gz
bsfs-28a021483c13e974e00b6159f0653b0727df9d10.tar.bz2
bsfs-28a021483c13e974e00b6159f0653b0727df9d10.zip
prohibit certain characters in URI and ensure URIs in bsfs.graph
-rw-r--r--bsfs/graph/nodes.py5
-rw-r--r--bsfs/schema/types.py2
-rw-r--r--bsfs/triple_store/sparql/parse_filter.py6
-rw-r--r--bsfs/triple_store/sparql/sparql.py10
-rw-r--r--bsfs/utils/uri.py8
-rw-r--r--test/graph/test_nodes.py19
-rw-r--r--test/query/ast_test/test_filter_.py2
-rw-r--r--test/triple_store/sparql/test_parse_filter.py2
-rw-r--r--test/triple_store/sparql/test_sparql.py4
-rw-r--r--test/utils/test_uri.py19
10 files changed, 42 insertions, 35 deletions
diff --git a/bsfs/graph/nodes.py b/bsfs/graph/nodes.py
index c3530c1..84996c7 100644
--- a/bsfs/graph/nodes.py
+++ b/bsfs/graph/nodes.py
@@ -52,9 +52,8 @@ class Nodes():
self._backend = backend
self._ac = access_control
self._node_type = node_type
- self._guids = set(guids)
- # create helper instances
- # FIXME: Assumes that the schema does not change while the instance is in use!
+ # convert to URI since this is not guaranteed by Graph
+ self._guids = {URI(guid) for guid in guids}
def __eq__(self, other: typing.Any) -> bool:
return isinstance(other, Nodes) \
diff --git a/bsfs/schema/types.py b/bsfs/schema/types.py
index 54adffb..104580d 100644
--- a/bsfs/schema/types.py
+++ b/bsfs/schema/types.py
@@ -98,7 +98,7 @@ class _Type():
parent: typing.Optional['_Type'] = None,
**annotations: typing.Any,
):
- self.uri = uri
+ self.uri = URI(uri)
self.parent = parent
self.annotations = annotations
diff --git a/bsfs/triple_store/sparql/parse_filter.py b/bsfs/triple_store/sparql/parse_filter.py
index 8959b2c..bf19a02 100644
--- a/bsfs/triple_store/sparql/parse_filter.py
+++ b/bsfs/triple_store/sparql/parse_filter.py
@@ -154,7 +154,7 @@ class Filter():
puri = f'<{puri}>' # type: ignore [assignment] # variable re-use confuses mypy
# apply reverse flag
if node.reverse:
- puri = URI('^' + puri)
+ puri = '^' + puri
dom, rng = rng, dom # type: ignore [assignment] # variable re-use confuses mypy
# check path consistency
if not node_type <= dom:
@@ -267,9 +267,7 @@ class Filter():
"""
if not isinstance(node_type, bsc.Node):
raise errors.BackendError(f'expected Node, found {node_type}')
- if not rdflib.term._is_valid_uri(node.value): # pylint: disable=protected-access
- raise errors.BackendError(f'<{node.value}> is not a serializable uri')
- return f'VALUES {head} {{ <{node.value}> }}'
+ return f'VALUES {head} {{ <{URI(node.value)}> }}'
def _equals(self, node_type: bsc.Vertex, node: ast.filter.Equals, head: str) -> str:
"""
diff --git a/bsfs/triple_store/sparql/sparql.py b/bsfs/triple_store/sparql/sparql.py
index bd98f46..68c0027 100644
--- a/bsfs/triple_store/sparql/sparql.py
+++ b/bsfs/triple_store/sparql/sparql.py
@@ -284,10 +284,7 @@ class SparqlStore(base.TripleStoreBase):
raise errors.ConsistencyError(f'{node_type} is not defined in the schema')
# check and create guids
for guid in guids:
- # check convert to rdflib.URIRef
- if not rdflib.term._is_valid_uri(guid): # pylint: disable=protected-access
- raise ValueError(guids)
- subject = rdflib.URIRef(guid)
+ subject = rdflib.URIRef(URI(guid))
# check node existence
if (subject, rdflib.RDF.type, None) in self._graph:
# FIXME: node exists and may have a different type! ignore? raise? report?
@@ -326,10 +323,7 @@ class SparqlStore(base.TripleStoreBase):
raise errors.InstanceError(inconsistent)
# check guids
# FIXME: Fail or skip inexistent nodes?
- guids = set(guids)
- invalid = {guid for guid in guids if not rdflib.term._is_valid_uri(guid)} # pylint: disable=protected-access
- if len(invalid) > 0:
- raise ValueError(invalid)
+ guids = {URI(guid) for guid in guids}
inconsistent = {guid for guid in guids if not self._has_type(guid, node_type)}
if len(inconsistent) > 0:
raise errors.InstanceError(inconsistent)
diff --git a/bsfs/utils/uri.py b/bsfs/utils/uri.py
index 0693017..5755a6e 100644
--- a/bsfs/utils/uri.py
+++ b/bsfs/utils/uri.py
@@ -4,6 +4,8 @@ import re
import typing
# constants
+RX_CHARS = re.compile(r'[<>" {}|\\^]')
+
RX_URI = re.compile(r'''
^
(?:(?P<scheme>[^:/?#]+):)? # scheme, ://-delimited
@@ -77,6 +79,9 @@ class URI(str):
no claim about the validity of an URI!
"""
+ # check characters
+ if RX_CHARS.search(query) is not None:
+ return False
# check uri
parts = RX_URI.match(query)
if parts is not None:
@@ -227,9 +232,6 @@ class URI(str):
# overload formatting methods
- def format(self, *args, **kwargs) -> 'URI':
- return URI(super().format(*args, **kwargs))
-
def __mod__(self, *args) -> 'URI':
return URI(super().__mod__(*args))
diff --git a/test/graph/test_nodes.py b/test/graph/test_nodes.py
index 083b2d8..dca887a 100644
--- a/test/graph/test_nodes.py
+++ b/test/graph/test_nodes.py
@@ -123,15 +123,19 @@ class TestNodes(unittest.TestCase):
URI('http://example.com/me/tag#4321'),
}
+ def test_construct(self):
+ self.assertIsInstance(Nodes(self.backend, self.ac, self.ent_type, {'http://example.com/me-and-you'}), Nodes)
+ self.assertRaises(ValueError, Nodes, self.backend, self.ac, self.ent_type, {'http://example.com/me and you'})
+
def test_str(self):
# str baseline
- nodes = Nodes(self.backend, self.ac, self.ent_type, self.ent_ids)
- self.assertEqual(str(nodes), f'Nodes({self.ent_type}, {self.ent_ids})')
- self.assertEqual(repr(nodes), f'Nodes({self.backend}, {self.ac}, {self.ent_type}, {self.ent_ids})')
+ nodes = Nodes(self.backend, self.ac, self.ent_type, {URI('http://example.com/me/entity#1234')})
+ self.assertEqual(str(nodes), f"Nodes({self.ent_type}, {{'http://example.com/me/entity#1234'}})")
+ self.assertEqual(repr(nodes), f"Nodes({self.backend}, {self.ac}, {self.ent_type}, {{'http://example.com/me/entity#1234'}})")
# str respects node_type
- nodes = Nodes(self.backend, self.ac, self.tag_type, self.tag_ids)
- self.assertEqual(str(nodes), f'Nodes({self.tag_type}, {self.tag_ids})')
- self.assertEqual(repr(nodes), f'Nodes({self.backend}, {self.ac}, {self.tag_type}, {self.tag_ids})')
+ nodes = Nodes(self.backend, self.ac, self.tag_type, {URI('http://example.com/me/tag#1234')})
+ self.assertEqual(str(nodes), f"Nodes({self.tag_type}, {{'http://example.com/me/tag#1234'}})")
+ self.assertEqual(repr(nodes), f"Nodes({self.backend}, {self.ac}, {self.tag_type}, {{'http://example.com/me/tag#1234'}})")
# str respects guids
nodes = Nodes(self.backend, self.ac, self.ent_type, {URI('http://example.com/me/entity#foo')})
self.assertEqual(str(nodes), f'Nodes({self.ent_type}, {{\'http://example.com/me/entity#foo\'}})')
@@ -437,7 +441,8 @@ class TestNodes(unittest.TestCase):
self.assertRaises(TypeError, nodes.get, 1234)
self.assertRaises(TypeError, nodes.get, (ns.bse.tag, 1234))
self.assertRaises(TypeError, nodes.get, (1234, ns.bse.tag))
- self.assertRaises(errors.ConsistencyError, nodes.get, 'hello world')
+ self.assertRaises(ValueError, nodes.get, 'hello world')
+ self.assertRaises(errors.ConsistencyError, nodes.get, 'hello_world')
self.assertRaises(errors.ConsistencyError, nodes.get, ns.bse.invalid)
self.assertRaises(errors.ConsistencyError, nodes.get, (ns.bse.tag, bst.invalid))
# can pass multiple paths
diff --git a/test/query/ast_test/test_filter_.py b/test/query/ast_test/test_filter_.py
index cdc530c..d0d42ea 100644
--- a/test/query/ast_test/test_filter_.py
+++ b/test/query/ast_test/test_filter_.py
@@ -382,7 +382,7 @@ class TestPredicate(unittest.TestCase):
# member returns predicate
# predicate must be an URI
self.assertEqual(Predicate(ns.bse.filesize).predicate, ns.bse.filesize)
- self.assertEqual(Predicate(URI('hello world')).predicate, URI('hello world'))
+ self.assertEqual(Predicate(URI('hello_world')).predicate, URI('hello_world'))
self.assertRaises(TypeError, Predicate, 1234)
self.assertRaises(TypeError, Predicate, FilterExpression())
self.assertRaises(TypeError, Predicate, FilterExpression())
diff --git a/test/triple_store/sparql/test_parse_filter.py b/test/triple_store/sparql/test_parse_filter.py
index 6db9224..5b6ca8a 100644
--- a/test/triple_store/sparql/test_parse_filter.py
+++ b/test/triple_store/sparql/test_parse_filter.py
@@ -158,7 +158,7 @@ class TestParseFilter(unittest.TestCase):
# _is requires a node
self.assertRaises(errors.BackendError, self.parser._is, self.schema.literal(ns.bsfs.Literal), ast.filter.Is('http://example.com/entity#1234'), '?ent')
# _is requires a serializable guid
- self.assertRaises(errors.BackendError, self.parser._is, self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#foo and bar'), '?ent')
+ self.assertRaises(ValueError, self.parser._is, self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#foo and bar'), '?ent')
# a single Is statement
q = self.parser(self.schema.node(ns.bsfs.Entity), ast.filter.Is('http://example.com/entity#1234'))
self.assertSetEqual({str(guid) for guid, in q(self.graph)},
diff --git a/test/triple_store/sparql/test_sparql.py b/test/triple_store/sparql/test_sparql.py
index d880082..f45ca37 100644
--- a/test/triple_store/sparql/test_sparql.py
+++ b/test/triple_store/sparql/test_sparql.py
@@ -679,7 +679,7 @@ class TestSparqlStore(unittest.TestCase):
URI('http://example.com/me/entity#1234'), URI('http://example.com/me/entity#4321')})
# guid must be valid
- self.assertRaises(ValueError, store.create, self.schema.node(ns.bsfs.Entity), {URI('http://example.com/me/foo and bar')})
+ self.assertRaises(ValueError, store.create, self.schema.node(ns.bsfs.Entity), {'http://example.com/me/foo and bar'})
# can create some nodes
ent_type = store.schema.node(ns.bsfs.Entity)
@@ -770,7 +770,7 @@ class TestSparqlStore(unittest.TestCase):
self.assertRaises(errors.ConsistencyError, store.set, ent_type, ent_ids, p_invalid, {'http://example.com/me/tag#1234'})
# invalid guid is not permitted
- self.assertRaises(ValueError, store.set, ent_type, {URI('http://example.com/me/foo and bar')}, p_filesize, {1234})
+ self.assertRaises(ValueError, store.set, ent_type, {'http://example.com/me/foo and bar'}, p_filesize, {1234})
# predicate must match node_type
self.assertRaises(errors.ConsistencyError, store.set, tag_type, tag_ids, p_filesize, {1234})
diff --git a/test/utils/test_uri.py b/test/utils/test_uri.py
index 6ee2ef7..1c4c9f9 100644
--- a/test/utils/test_uri.py
+++ b/test/utils/test_uri.py
@@ -35,6 +35,16 @@ class TestURI(unittest.TestCase):
self.assertTrue(URI.is_parseable('telnet://192.0.2.16:80/'))
self.assertTrue(URI.is_parseable('urn:oasis:names:specification:docbook:dtd:xml:4.1.2'))
+ # some characters are prohibited
+ self.assertFalse(URI.is_parseable('http://example.com/foo<bar'))
+ self.assertFalse(URI.is_parseable('http://example.com/foo>bar'))
+ self.assertFalse(URI.is_parseable('http://example.com/foo bar'))
+ self.assertFalse(URI.is_parseable('http://example.com/foo{bar'))
+ self.assertFalse(URI.is_parseable('http://example.com/foo}bar'))
+ self.assertFalse(URI.is_parseable('http://example.com/foo|bar'))
+ self.assertFalse(URI.is_parseable('http://example.com/foo^bar'))
+ self.assertFalse(URI.is_parseable('http://example.com/foo\\bar'))
+
# uri cannot end with a scheme delimiter
self.assertFalse(URI.is_parseable('http://'))
# port must be a number
@@ -159,10 +169,10 @@ class TestURI(unittest.TestCase):
def test_overloaded(self):
# composition
- self.assertIsInstance(URI('http://user@www.example.com:1234/{}/path1?{}#fragment') + 'hello', URI)
- self.assertIsInstance(URI('http://user@www.example.com:1234/{}/path1?{}#fragment') * 2, URI)
- self.assertIsInstance(2 * URI('http://user@www.example.com:1234/{}/path1?{}#fragment'), URI) # rmul
- self.assertIsInstance(URI('http://user@www.example.com:1234/{}/path1?{}#fragment').join(['hello', 'world']) , URI)
+ self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment') + 'hello', URI)
+ self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment') * 2, URI)
+ self.assertIsInstance(2 * URI('http://user@www.example.com:1234/path0/path1?query#fragment'), URI) # rmul
+ self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').join(['hello', 'world']) , URI)
# stripping
self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').strip(), URI)
self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').lstrip(), URI)
@@ -171,7 +181,6 @@ class TestURI(unittest.TestCase):
self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').lower(), URI)
self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').upper(), URI)
# formatting
- self.assertIsInstance(URI('http://user@www.example.com:1234/{}/path1?{}#fragment').format('hello', 'world'), URI)
self.assertIsInstance(URI('http://user@www.example.com:1234/%s/path1?%s#fragment') % ('hello', 'world'), URI)
self.assertIsInstance(URI('http://user@www.example.com:1234/path0/path1?query#fragment').replace('path0', 'pathX'), URI)