From cf032db8785149689d94232b400e20e4d6336562 Mon Sep 17 00:00:00 2001 From: Matthias Baumgartner Date: Fri, 28 Jul 2023 12:00:11 +0200 Subject: minor style and text fixes --- test/extractor/text/test_metrics.py | 5 ++++- test/extractor/text/test_summary.py | 10 ++++++++-- test/reader/document/test_plain.py | 6 ++++++ 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/extractor/text/test_metrics.py b/test/extractor/text/test_metrics.py index 9cc6a94..6d87889 100644 --- a/test/extractor/text/test_metrics.py +++ b/test/extractor/text/test_metrics.py @@ -55,8 +55,8 @@ class TestTextMetrics(unittest.TestCase): path = os.path.join(os.path.dirname(__file__), 'example-en.txt') # fetch document text = rdr(path) + # extracts all specified predicates triples = set(ext.extract(subject, text, principals)) - self.assertSetEqual({(s,p,o) for s,p,o in triples if p.uri != ns.bse.vocabulary_entropy}, { (subject, ext.schema.predicate(ns.bse.num_characters), 21997), (subject, ext.schema.predicate(ns.bse.num_paragraphs), 48), @@ -66,6 +66,9 @@ class TestTextMetrics(unittest.TestCase): entropy = {o for s,p,o in triples if p.uri == ns.bse.vocabulary_entropy} self.assertEqual(len(entropy), 1) self.assertAlmostEqual(list(entropy)[0], 8.830360505) + # skip unknown predicates + self.assertSetEqual(set(), set(ext.extract(subject, text, + {ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.unknown)}))) ## main ## diff --git a/test/extractor/text/test_summary.py b/test/extractor/text/test_summary.py index 78d3002..aee1ee2 100644 --- a/test/extractor/text/test_summary.py +++ b/test/extractor/text/test_summary.py @@ -36,14 +36,20 @@ class TestTextMetrics(unittest.TestCase): path = os.path.join(os.path.dirname(__file__), 'example-en.txt') # fetch document text = rdr(path) - + # empty input yields no triples + self.assertEqual(list(ext.extract(subject, [], principals)), []) + self.assertEqual(list(ext.extract(subject, [' '], principals)), []) + self.assertEqual(list(ext.extract(subject, [' ', ' ', ' '], principals)), []) + # creates a summary with warnings.catch_warnings(): warnings.simplefilter('ignore', category=FutureWarning) triples = list(ext.extract(subject, text, principals)) - self.assertEqual(triples, [ (subject, ext.schema.predicate(ns.bse.summary), 'Alice is tired of sitting by her sister on the bank')]) + # skip unknown predicates + self.assertSetEqual(set(), set(ext.extract(subject, text, + {ext.schema.predicate(ns.bsfs.Predicate).child(ns.bse.unknown)}))) ## main ## diff --git a/test/reader/document/test_plain.py b/test/reader/document/test_plain.py index c63fb30..6bbf8c6 100644 --- a/test/reader/document/test_plain.py +++ b/test/reader/document/test_plain.py @@ -3,6 +3,9 @@ import os import unittest +# bsie imports +from bsie.utils import errors + # objects to test from bsie.reader.document._plain import Plain @@ -29,6 +32,9 @@ class TestPlain(unittest.TestCase): 'Semper eget duis at tellus at. Neque egestas congue quisque egestas diam in arcu cursus euismod. Erat nam at lectus urna duis convallis convallis. Tempus urna et pharetra pharetra massa massa ultricies mi quis. Magna eget est lorem ipsum dolor sit amet consectetur.', 'Orci sagittis eu volutpat odio. Risus pretium quam vulputate dignissim suspendisse in. Volutpat est velit egestas dui id. Massa placerat duis ultricies lacus sed turpis. In nulla posuere sollicitudin aliquam ultrices sagittis orci a. Vel elit scelerisque mauris pellentesque pulvinar pellentesque.', ]) + # ignores unknown files + self.assertRaises(errors.UnsupportedFileFormatError, rdr, + os.path.join(os.path.dirname(__file__), '..', 'testimage_exif.jpg')) ## main ## -- cgit v1.2.3