diff options
Diffstat (limited to 'bsie/extractor/text/metrics.py')
-rw-r--r-- | bsie/extractor/text/metrics.py | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/bsie/extractor/text/metrics.py b/bsie/extractor/text/metrics.py index ddb943f..91e0e22 100644 --- a/bsie/extractor/text/metrics.py +++ b/bsie/extractor/text/metrics.py @@ -17,14 +17,16 @@ __all__: typing.Sequence[str] = ( ## code ## -log2 = lambda x: math.log(x) / math.log(2) +def log2(value: float) -> float: + """Base 2 logarithm.""" + return math.log(value) / math.log(2) class TextMetrics(base.Extractor): """Extract text metrics (character, word, and line counts) from a document.""" CONTENT_READER = 'bsie.reader.document.Document' - _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[str], typing.Any]] + _callmap: typing.Dict[bsfs.schema.Predicate, typing.Callable[[typing.Sequence[str]], typing.Any]] def __init__(self): super().__init__(bsfs.schema.from_string(base.SCHEMA_PREAMBLE + ''' @@ -66,7 +68,7 @@ class TextMetrics(base.Extractor): subject: nodes.Entity, content: typing.Sequence[str], principals: typing.Iterable[bsfs.schema.Predicate], - ) -> typing.Iterator[typing.Tuple[nodes.Entity, bsfs.schema.Predicate, typing.Any]]: + ) -> typing.Iterator[typing.Tuple[nodes.Node, bsfs.schema.Predicate, typing.Any]]: for pred in principals: # find callback clbk = self._callmap.get(pred) @@ -76,10 +78,10 @@ class TextMetrics(base.Extractor): yield subject, pred, clbk(content) def __num_words(self, text: typing.Sequence[str]) -> int: - return sum([len(paragraph.split()) for paragraph in text]) + return sum(len(paragraph.split()) for paragraph in text) def __num_characters(self, text: typing.Sequence[str]) -> int: - return sum([len(paragraph) for paragraph in text]) + return sum(len(paragraph) for paragraph in text) def __num_paragraphs(self, text: typing.Sequence[str]) -> int: return len(text) |