Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132930 views
License: OTHER
1
#!/usr/bin/env python
2
3
4
def wer(r, h):
5
"""
6
Calculation of WER with Levenshtein distance.
7
Works only for iterables up to 254 elements (uint8).
8
O(nm) time ans space complexity.
9
10
>>> wer("who is there".split(), "is there".split())
11
1
12
>>> wer("who is there".split(), "".split())
13
3
14
>>> wer("".split(), "who is there".split())
15
3
16
"""
17
# initialisation
18
import numpy
19
d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint8)
20
d = d.reshape((len(r)+1, len(h)+1))
21
for i in range(len(r)+1):
22
for j in range(len(h)+1):
23
if i == 0:
24
d[0][j] = j
25
elif j == 0:
26
d[i][0] = i
27
28
# computation
29
for i in range(1, len(r)+1):
30
for j in range(1, len(h)+1):
31
if r[i-1] == h[j-1]:
32
d[i][j] = d[i-1][j-1]
33
else:
34
substitution = d[i-1][j-1] + 1
35
insertion = d[i][j-1] + 1
36
deletion = d[i-1][j] + 1
37
d[i][j] = min(substitution, insertion, deletion)
38
39
return d[len(r)][len(h)]
40
41
if __name__ == "__main__":
42
import doctest
43
doctest.testmod()
44
45