aboutsummaryrefslogtreecommitdiffstats
path: root/test/extractor/text/test_summary.py
diff options
context:
space:
mode:
authorMatthias Baumgartner <dev@igsor.net>2023-07-28 11:31:24 +0200
committerMatthias Baumgartner <dev@igsor.net>2023-07-28 11:31:24 +0200
commit11b26a913d39edb7f36cd0a3b3d8e74c96738579 (patch)
tree463af082c0b77916c11a84263c96fc91ebedabed /test/extractor/text/test_summary.py
parent28e3640e0b5e03b50bf66711f46937f07a3d7fef (diff)
downloadbsie-11b26a913d39edb7f36cd0a3b3d8e74c96738579.tar.gz
bsie-11b26a913d39edb7f36cd0a3b3d8e74c96738579.tar.bz2
bsie-11b26a913d39edb7f36cd0a3b3d8e74c96738579.zip
document digestion:
* plaintext reader * text metrics extractor * text summary extractor
Diffstat (limited to 'test/extractor/text/test_summary.py')
-rw-r--r--test/extractor/text/test_summary.py53
1 files changed, 53 insertions, 0 deletions
diff --git a/test/extractor/text/test_summary.py b/test/extractor/text/test_summary.py
new file mode 100644
index 0000000..78d3002
--- /dev/null
+++ b/test/extractor/text/test_summary.py
@@ -0,0 +1,53 @@
+
+# standard imports
+import os
+import unittest
+import warnings
+
+# bsie imports
+from bsie.extractor import base
+from bsie.matcher import nodes
+from bsie.reader.document import Document
+from bsie.utils import bsfs, ns
+
+# objects to test
+from bsie.extractor.text.summary import Summary
+
+
+## code ##
+
+class TestTextMetrics(unittest.TestCase):
+
+ def test_schema(self):
+ self.assertEqual(Summary().schema,
+ bsfs.schema.from_string(base.SCHEMA_PREAMBLE + '''
+ bse:summary rdfs:subClassOf bsfs:Predicate ;
+ rdfs:domain bsn:Entity ;
+ rdfs:range xsd:string ;
+ bsfs:unique "true"^^xsd:boolean .
+ '''))
+
+ def test_extract(self):
+ # setup
+ rdr = Document()
+ ext = Summary(max_length=20, num_beams=1)
+ subject = nodes.Entity(ucid='abc123')
+ principals = set(ext.principals)
+ path = os.path.join(os.path.dirname(__file__), 'example-en.txt')
+ # fetch document
+ text = rdr(path)
+
+ with warnings.catch_warnings():
+ warnings.simplefilter('ignore', category=FutureWarning)
+ triples = list(ext.extract(subject, text, principals))
+
+ self.assertEqual(triples, [
+ (subject, ext.schema.predicate(ns.bse.summary),
+ 'Alice is tired of sitting by her sister on the bank')])
+
+## main ##
+
+if __name__ == '__main__':
+ unittest.main()
+
+## EOF ##