# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Html handler customizations for simple_wer_v2."""
import re
from lingvo.tasks.asr.tools import simple_wer_v2
# pylint: disable=arguments-renamed
[docs]class ChainOfHtmlHandlers(simple_wer_v2.HtmlHandler):
"""A handler that is a chain of handlers.
All Setup and Render functions are called in sequential order.
"""
def __init__(self, html_handlers=None):
assert html_handlers, 'Must provide list of handlers as input.'
self._html_handlers = html_handlers
[docs] def Setup(self, hypothesis, reference):
for handler in self._html_handlers:
handler.Setup(hypothesis, reference)
[docs] def Render(self, **kwargs):
return ''.join(
[handler.Render(**kwargs) for handler in self._html_handlers])
[docs]class TagHtmlHandler(simple_wer_v2.HtmlHandler):
"""Handler to cache and add tags back to original positions in transcript."""
[docs] def Setup(self, hypothesis, reference):
# Cache tag information before they're removed for WER computation.
self.tags = FindTags(hypothesis.split())
[docs] def Render(self, pos_hyp=None, **kwargs):
"""Show tags in the output html.
Args:
pos_hyp: current word position in the hypothesis
**kwargs: unused
Returns:
Tag strings
"""
tag_strs = []
while self.tags and pos_hyp == self.tags[-1][0]:
_, tag_str = self.tags.pop()
tag_strs.append(tag_str)
if tag_strs:
return ' '.join(tag_strs[::-1]) + ' '
return ''
[docs]class NewlineHtmlHandler(simple_wer_v2.HtmlHandler):
"""Handler to insert newline into html at fixed ref word intervals.
Useful for side-by-side comparisons of long transcripts.
"""
def __init__(self, num_words_per_line=-1):
"""Specify the number of reference words to display on each line.
Args:
num_words_per_line: number of ref words on each line
"""
self.num_words_per_line = num_words_per_line
[docs] def Setup(self, hypothesis, reference):
self.line_pos = 0
[docs] def Render(self, err_type=None, **kwargs):
"""Set number of ref words to display per line.
Args:
err_type: error type
**kwargs: unused
Returns:
Newline string when number of words reaches num_words_per_line
"""
if err_type != 'ins' and self.num_words_per_line > 0:
self.line_pos += 1
if self.line_pos % self.num_words_per_line == 0:
self.line_pos = 0
return '<br> '
return ''