aboutsummaryrefslogtreecommitdiffstats
path: root/test/extractor
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-07-28 12:00:11 +0200
committerMatthias Baumgartner <dev@igsor.net>2023-07-28 12:00:11 +0200
commitcf032db8785149689d94232b400e20e4d6336562 (patch)
treec912f62227b06430bf4c11a820c0a4b34b46512c /test/extractor
parent11b26a913d39edb7f36cd0a3b3d8e74c96738579 (diff)
downloadbsie-develop.tar.gz
bsie-develop.tar.bz2
bsie-develop.zip
minor style and text fixesdevelop
Diffstat (limited to 'test/extractor')
-rw-r--r--test/extractor/text/test_metrics.py5
-rw-r--r--test/extractor/text/test_summary.py10
2 files changed, 12 insertions, 3 deletions
diff --git a/test/extractor/text/test_metrics.py b/test/extractor/text/test_metrics.py
index 9cc6a94..6d87889 100644
--- a/test/extractor/text/test_metrics.py
+++ b/test/extractor/text/test_metrics.py
@@ -55,8 +55,8 @@ class TestTextMetrics(unittest.TestCase):
path = os.path.join(os.path.dirname(__file__), 'example-en.txt')
# fetch document
text = rdr(path)
+ # extracts all specified predicates
triples = set(ext.extract(subject, text, principals))
-
self.assertSetEqual({(s,p,o) for s,p,o in triples if p.uri != ns.bse.vocabulary_entropy}, {
(subject, ext.schema.predicate(ns.bse.num_characters), 21997),
(subject, ext.schema.predicate(ns.bse.num_paragraphs), 48),
@@ -66,6 +66,9 @@ class TestTextMetrics(unittest.TestCase):
entropy = {o for s,p,o in triples if p.uri == ns.bse.vocabulary_entropy}
self.assertEqual(len(entropy), 1)
self.assertAlmostEqual(list(entropy)[0], 8.830360505)
+ # skip unknown predicates
+ self.assertSetEqual(set(), set(ext.extract(subject, text,
+ {ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.unknown)})))
## main ##
diff --git a/test/extractor/text/test_summary.py b/test/extractor/text/test_summary.py
index 78d3002..aee1ee2 100644
--- a/test/extractor/text/test_summary.py
+++ b/test/extractor/text/test_summary.py
@@ -36,14 +36,20 @@ class TestTextMetrics(unittest.TestCase):
path = os.path.join(os.path.dirname(__file__), 'example-en.txt')
# fetch document
text = rdr(path)
-
+ # empty input yields no triples
+ self.assertEqual(list(ext.extract(subject, [], principals)), [])
+ self.assertEqual(list(ext.extract(subject, [' '], principals)), [])
+ self.assertEqual(list(ext.extract(subject, [' ', ' ', ' '], principals)), [])
+ # creates a summary
with warnings.catch_warnings():
warnings.simplefilter('ignore', category=FutureWarning)
triples = list(ext.extract(subject, text, principals))
-
self.assertEqual(triples, [
(subject, ext.schema.predicate(ns.bse.summary),
'Alice is tired of sitting by her sister on the bank')])
+ # skip unknown predicates
+ self.assertSetEqual(set(), set(ext.extract(subject, text,
+ {ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.unknown)})))
## main ##