Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132928 views
License: OTHER
1
""" A neural chatbot using sequence to sequence model with
2
attentional decoder.
3
4
This is based on Google Translate Tensorflow model
5
https://github.com/tensorflow/models/blob/master/tutorials/rnn/translate/
6
7
Sequence to sequence model by Cho et al.(2014)
8
9
Created by Chip Huyen ([email protected])
10
CS20: "TensorFlow for Deep Learning Research"
11
cs20.stanford.edu
12
13
This file contains the hyperparameters for the model.
14
15
See README.md for instruction on how to run the starter code.
16
"""
17
18
# parameters for processing the dataset
19
DATA_PATH = 'data/cornell movie-dialogs corpus'
20
CONVO_FILE = 'movie_conversations.txt'
21
LINE_FILE = 'movie_lines.txt'
22
OUTPUT_FILE = 'output_convo.txt'
23
PROCESSED_PATH = 'processed'
24
CPT_PATH = 'checkpoints'
25
26
THRESHOLD = 2
27
28
PAD_ID = 0
29
UNK_ID = 1
30
START_ID = 2
31
EOS_ID = 3
32
33
TESTSET_SIZE = 25000
34
35
BUCKETS = [(19, 19), (28, 28), (33, 33), (40, 43), (50, 53), (60, 63)]
36
37
38
CONTRACTIONS = [("i ' m ", "i 'm "), ("' d ", "'d "), ("' s ", "'s "),
39
("don ' t ", "do n't "), ("didn ' t ", "did n't "), ("doesn ' t ", "does n't "),
40
("can ' t ", "ca n't "), ("shouldn ' t ", "should n't "), ("wouldn ' t ", "would n't "),
41
("' ve ", "'ve "), ("' re ", "'re "), ("in ' ", "in' ")]
42
43
NUM_LAYERS = 3
44
HIDDEN_SIZE = 256
45
BATCH_SIZE = 64
46
47
LR = 0.5
48
MAX_GRAD_NORM = 5.0
49
50
NUM_SAMPLES = 512
51
52