Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132937 views
License: OTHER
1
def examples_queue(data_sources, data_fields_to_features, training,
2
data_items_to_decoders=None, data_items_to_decode=None):
3
"""Contruct a queue of training or evaluation examples.
4
5
This function will create a reader from files given by data_sources,
6
then enqueue the tf.Examples from these files, shuffling if training
7
is true, and finally parse these tf.Examples to tensors.
8
9
The dictionary data_fields_to_features for an image dataset can be this:
10
11
data_fields_to_features = {
12
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
13
'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'),
14
'image/class/label': tf.FixedLenFeature(
15
[1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)),
16
}
17
18
and for a simple algorithmic dataset with variable-length data it is this:
19
20
data_fields_to_features = {
21
'inputs': tf.VarLenFeature(tf.int64),
22
'targets': tf.VarLenFeature(tf.int64),
23
}
24
25
The data_items_to_decoders dictionary argument can be left as None if there
26
is no decoding to be performed. But, e.g. for images, it should be set so that
27
the images are decoded from the features, e.g., like this for MNIST:
28
29
data_items_to_decoders = {
30
'image': tfexample_decoder.Image(
31
image_key = 'image/encoded',
32
format_key = 'image/format',
33
shape=[28, 28],
34
channels=1),
35
'label': tfexample_decoder.Tensor('image/class/label'),
36
}
37
38
These arguments are compatible with the use of tf.contrib.slim.data module,
39
see there for more documentation.
40
41
Args:
42
data_sources: a list or tuple of sources from which the data will be read,
43
for example [/path/to/train@128, /path/to/train2*, /tmp/.../train3*]
44
data_fields_to_features: a dictionary from data fields in the data sources
45
to features, such as tf.VarLenFeature(tf.int64), see above for examples.
46
training: a Boolean, whether to read for training or evaluation.
47
data_items_to_decoders: a dictionary mapping data items (that will be
48
in the returned result) to decoders that will decode them using features
49
defined in data_fields_to_features; see above for examples. By default
50
(if this is None), we grab the tensor from every feature.
51
data_items_to_decode: a subset of data items that will be decoded;
52
by default (if this is None), we decode all items.
53
54
Returns:
55
A dictionary mapping each data_field to a corresponding 1D int64 tensor
56
read from the created queue.
57
58
Raises:
59
ValueError: if no files are found with the provided data_prefix or no data
60
fields were provided.
61
"""
62
with tf.name_scope("examples_queue"):
63
# Read serialized examples using slim parallel_reader.
64
_, example_serialized = tf.contrib.slim.parallel_reader.parallel_read(
65
data_sources, tf.TFRecordReader, shuffle=training,
66
num_readers=4 if training else 1)
67
68
if data_items_to_decoders is None:
69
data_items_to_decoders = {
70
field: tf.contrib.slim.tfexample_decoder.Tensor(field)
71
for field in data_fields_to_features
72
}
73
74
decoder = tf.contrib.slim.tfexample_decoder.TFExampleDecoder(
75
data_fields_to_features, data_items_to_decoders)
76
77
if data_items_to_decode is None:
78
data_items_to_decode = data_items_to_decoders.keys()
79
80
decoded = decoder.decode(example_serialized, items=data_items_to_decode)
81
return {field: tensor
82
for (field, tensor) in zip(data_items_to_decode, decoded)}
83
84
85
def batch_examples(examples, batch_size, bucket_boundaries=None):
86
"""Given a queue of examples, create batches of examples with similar lengths.
87
88
We assume that examples is a dictionary with string keys and tensor values,
89
possibly coming from a queue, e.g., constructed by examples_queue above.
90
Each tensor in examples is assumed to be 1D. We will put tensors of similar
91
length into batches togeter. We return a dictionary with the same keys as
92
examples, and with values being batches of size batch_size. If elements have
93
different lengths, they are padded with 0s. This function is based on
94
tf.contrib.training.bucket_by_sequence_length so see there for details.
95
96
For example, if examples is a queue containing [1, 2, 3] and [4], then
97
this function with batch_size=2 will return a batch [[1, 2, 3], [4, 0, 0]].
98
99
Args:
100
examples: a dictionary with string keys and 1D tensor values.
101
batch_size: a python integer or a scalar int32 tensor.
102
bucket_boundaries: a list of integers for the boundaries that will be
103
used for bucketing; see tf.contrib.training.bucket_by_sequence_length
104
for more details; if None, we create a default set of buckets.
105
106
Returns:
107
A dictionary with the same keys as examples and with values being batches
108
of examples padded with 0s, i.e., [batch_size x length] tensors.
109
"""
110
# Create default buckets if none were provided.
111
if bucket_boundaries is None:
112
# Small buckets -- go in steps of 8 until 64.
113
small_buckets = [8 * (i + 1) for i in xrange(8)]
114
# Medium buckets -- go in steps of 32 until 256.
115
medium_buckets = [32 * (i + 3) for i in xrange(6)]
116
# Large buckets -- go in steps of 128 until maximum of 1024.
117
large_buckets = [128 * (i + 3) for i in xrange(6)]
118
# By default use the above 20 bucket boundaries (21 queues in total).
119
bucket_boundaries = small_buckets + medium_buckets + large_buckets
120
with tf.name_scope("batch_examples"):
121
# The queue to bucket on will be chosen based on maximum length.
122
max_length = 0
123
for v in examples.values(): # We assume 0-th dimension is the length.
124
max_length = tf.maximum(max_length, tf.shape(v)[0])
125
(_, outputs) = tf.contrib.training.bucket_by_sequence_length(
126
max_length, examples, batch_size, bucket_boundaries,
127
capacity=2 * batch_size, dynamic_pad=True)
128
return outputs
129
130