IGNORE_ID = -1
def pad_list(xs, pad_value, max_len = None):
n_batch = len(xs)
if max_len is None:
max_len = max(x.size(0) for x in xs)
pad = xs[0].new(n_batch, max_len, * xs[0].size()[1:]).fill_(pad_value)
for i in range(n_batch):
pad[i, :xs[i].size(0)] = xs[i]
return pad
def process_dict(dict_path):
with open(dict_path, 'rb') as f:
dictionary = f.readlines()
char_list = [entry.decode('utf-8').split(' ')[0]
for entry in dictionary]
sos_id = char_list.index('<sos>')
eos_id = char_list.index('<eos>')
return char_list, sos_id, eos_id
if __name__ == "__main__":
import sys
path = sys.argv[1]
char_list, sos_id, eos_id = process_dict(path)
print(char_list, sos_id, eos_id)
def parse_hypothesis(hyp, char_list):
"""Function to parse hypothesis
:param list hyp: recognition hypothesis
:param list char_list: list of characters
:return: recognition text strinig
:return: recognition token strinig
:return: recognition tokenid string
"""
tokenid_as_list = list(map(int, hyp['yseq'][1:]))
token_as_list = [char_list[idx] for idx in tokenid_as_list]
score = float(hyp['score'])
tokenid = " ".join([str(idx) for idx in tokenid_as_list])
token = " ".join(token_as_list)
text = "".join(token_as_list).replace('<space>', ' ')
return text, token, tokenid, score
def add_results_to_json(js, nbest_hyps, char_list):
"""Function to add N-best results to json
:param dict js: groundtruth utterance dict
:param list nbest_hyps: list of hypothesis
:param list char_list: list of characters
:return: N-best results added utterance dict
"""
new_js = dict()
new_js['utt2spk'] = js['utt2spk']
new_js['output'] = []
for n, hyp in enumerate(nbest_hyps, 1):
rec_text, rec_token, rec_tokenid, score = parse_hypothesis(
hyp, char_list)
out_dic = dict(js['output'][0].items())
out_dic['name'] += '[%d]' % n
out_dic['rec_text'] = rec_text
out_dic['rec_token'] = rec_token
out_dic['rec_tokenid'] = rec_tokenid
out_dic['score'] = score
new_js['output'].append(out_dic)
if n == 1:
print('groundtruth: %s' % out_dic['text'])
print('prediction : %s' % out_dic['rec_text'])
return new_js
import torch
def get_non_pad_mask(padded_input, input_lengths=None, pad_idx=None):
"""padding position is set to 0, either use input_lengths or pad_idx
"""
assert input_lengths is not None or pad_idx is not None
if input_lengths is not None:
N = padded_input.size(0)
non_pad_mask = padded_input.new_ones(padded_input.size()[:-1])
for i in range(N):
non_pad_mask[i, input_lengths[i]:] = 0
if pad_idx is not None:
assert padded_input.dim() == 2
non_pad_mask = padded_input.ne(pad_idx).half()
return non_pad_mask.unsqueeze(-1)
def get_subsequent_mask(seq):
''' For masking out the subsequent info. '''
sz_b, len_s = seq.size()
subsequent_mask = torch.triu(
torch.ones((len_s, len_s), device=seq.device, dtype=torch.uint8), diagonal=1)
subsequent_mask = subsequent_mask.unsqueeze(0).expand(sz_b, -1, -1)
return subsequent_mask
def get_attn_key_pad_mask(seq_k, seq_q, pad_idx):
''' For masking out the padding part of key sequence. '''
len_q = seq_q.size(1)
padding_mask = seq_k.eq(pad_idx)
padding_mask = padding_mask.unsqueeze(1).expand(-1, len_q, -1)
return padding_mask
def get_attn_pad_mask(padded_input, input_lengths, expand_length):
"""mask position is set to 1"""
non_pad_mask = get_non_pad_mask(padded_input, input_lengths=input_lengths)
pad_mask = non_pad_mask.squeeze(-1).lt(1)
attn_mask = pad_mask.unsqueeze(1).expand(-1, expand_length, -1)
return attn_mask