📚 The CoCalc Library - books, templates and other resources
License: OTHER
""" Some people tried to use TextLineReader for the assignment 11but seem to have problems getting it work, so here is a short2script demonstrating the use of CSV reader on the heart dataset.3Note that the heart dataset is originally in txt so I first4converted it to csv to take advantage of the already laid out columns.56You can download heart.csv in the data folder.7Author: Chip Huyen8Prepared for the class CS 20SI: "TensorFlow for Deep Learning Research"9cs20si.stanford.edu10"""11import os12os.environ['TF_CPP_MIN_LOG_LEVEL']='2'1314import sys15sys.path.append('..')1617import tensorflow as tf1819DATA_PATH = 'data/heart.csv'20BATCH_SIZE = 221N_FEATURES = 92223def batch_generator(filenames):24""" filenames is the list of files you want to read from.25In this case, it contains only heart.csv26"""27filename_queue = tf.train.string_input_producer(filenames)28reader = tf.TextLineReader(skip_header_lines=1) # skip the first line in the file29_, value = reader.read(filename_queue)3031# record_defaults are the default values in case some of our columns are empty32# This is also to tell tensorflow the format of our data (the type of the decode result)33# for this dataset, out of 9 feature columns,34# 8 of them are floats (some are integers, but to make our features homogenous,35# we consider them floats), and 1 is string (at position 5)36# the last column corresponds to the lable is an integer3738record_defaults = [[1.0] for _ in range(N_FEATURES)]39record_defaults[4] = ['']40record_defaults.append([1])4142# read in the 10 rows of data43content = tf.decode_csv(value, record_defaults=record_defaults)4445# convert the 5th column (present/absent) to the binary value 0 and 146content[4] = tf.cond(tf.equal(content[4], tf.constant('Present')), lambda: tf.constant(1.0), lambda: tf.constant(0.0))4748# pack all 9 features into a tensor49features = tf.stack(content[:N_FEATURES])5051# assign the last column to label52label = content[-1]5354# minimum number elements in the queue after a dequeue, used to ensure55# that the samples are sufficiently mixed56# I think 10 times the BATCH_SIZE is sufficient57min_after_dequeue = 10 * BATCH_SIZE5859# the maximum number of elements in the queue60capacity = 20 * BATCH_SIZE6162# shuffle the data to generate BATCH_SIZE sample pairs63data_batch, label_batch = tf.train.shuffle_batch([features, label], batch_size=BATCH_SIZE,64capacity=capacity, min_after_dequeue=min_after_dequeue)6566return data_batch, label_batch6768def generate_batches(data_batch, label_batch):69with tf.Session() as sess:70coord = tf.train.Coordinator()71threads = tf.train.start_queue_runners(coord=coord)72for _ in range(10): # generate 10 batches73features, labels = sess.run([data_batch, label_batch])74print(features)75coord.request_stop()76coord.join(threads)7778def main():79data_batch, label_batch = batch_generator([DATA_PATH])80generate_batches(data_batch, label_batch)8182if __name__ == '__main__':83main()848586