CoCalc -- utils.py

GitHub Repository: y33-j3T/Coursera-Deep-Learning
Path: blob/master/Natural Language Processing with Sequence Models/Week 3 - LSTMs and Named Entity Recognition/utils.py
¹⁴⁴⁸³ views

1
def get_vocab(vocab_path, tags_path):
2
    vocab = {}
3
    with open(vocab_path) as f:
4
        for i, l in enumerate(f.read().splitlines()):
5
            vocab[l] = i  # to avoid the 0
6
        # loading tags (we require this to map tags to their indices)
7
    vocab['<PAD>'] = len(vocab) # 35180
8
    tag_map = {}
9
    with open(tags_path) as f:
10
        for i, t in enumerate(f.read().splitlines()):
11
            tag_map[t] = i 
12
    
13
    return vocab, tag_map
14

15
def get_params(vocab, tag_map, sentences_file, labels_file):
16
    sentences = []
17
    labels = []
18

19
    with open(sentences_file) as f:
20
        for sentence in f.read().splitlines():
21
            # replace each token by its index if it is in vocab
22
            # else use index of UNK_WORD
23
            s = [vocab[token] if token in vocab 
24
                 else vocab['UNK']
25
                 for token in sentence.split(' ')]
26
            sentences.append(s)
27

28
    with open(labels_file) as f:
29
        for sentence in f.read().splitlines():
30
            # replace each label by its index
31
            l = [tag_map[label] for label in sentence.split(' ')] # I added plus 1 here
32
            labels.append(l) 
33
    return sentences, labels, len(sentences)
34

35

Product

Resources

Company