; readWord success
  
an example of bad code with graph size in Tensorflow cella 2017 Branchable 0 12362
  


'''
train.mfcc file size is 12MB => data 12MB.
This code raises "ValueError: GraphDef cannot be larger than 2GB."
when epochSz * batchSz ~ 90, where 2 * 90 * 12MB ~ 2GB.
For-loop does not affect the result, since it doesn't add anything to the graph structure.
'''


import tensorflow as tf
import numpy as np
import struct

def data_from_mfccFile(filepath):
with open(filepath, 'rb') as f:
dataInFile = f.read() #
data = np.zeros(len(dataInFile) // 4, dtype=np.float32)
for i in range(len(data)):
data[i] = struct.unpack('f', dataInFile[i*4 : (i+1)*4])[0]
return data

batchSz = 20
epochSz = 3
seqSz = 300
timestepSz = 30
seqSzTimestepSz = seqSz * timestepSz

data = data_from_mfccFile("../lstm/train.mfcc")
#data = data_from_mfccFile("../lstm/test580m_i_16h.mfcc")

startIndices = tf.random_uniform([batchSz], maxval=1000, dtype=tf.int32)

epochData = [] # list
for b in range(epochSz): # for b-th batch
x = [] # list
y = [] # list
for s in range(batchSz): # for s-th sequence
i = tf.slice(startIndices, [s], [1]) # i is a tensor
iX = i * timestepSz
iY = (i + 1) * timestepSz
x.append(tf.slice(data, iX, [seqSzTimestepSz]))
y.append(tf.slice(data, iY, [seqSzTimestepSz]))
x = tf.reshape(x, [batchSz, seqSz, timestepSz]) # input
y = tf.reshape(y, [batchSz, seqSz, timestepSz]) # target

epochData.append((x,y),) # list of tuples; tuple (x,y) is a batchData


with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(100):
ed = sess.run(epochData)