''' train.mfcc file size is 12MB => data 12MB. This code raises "ValueError: GraphDef cannot be larger than 2GB." when epochSz * batchSz ~ 90, where 2 * 90 * 12MB ~ 2GB. For-loop does not affect the result, since it doesn't add anything to the graph structure. '''
import tensorflow as tf import numpy as np import struct
def data_from_mfccFile(filepath): with open(filepath, 'rb') as f: dataInFile = f.read() # data = np.zeros(len(dataInFile) // 4, dtype=np.float32) for i in range(len(data)): data[i] = struct.unpack('f', dataInFile[i*4 : (i+1)*4])[0] return data
batchSz = 20 epochSz = 3 seqSz = 300 timestepSz = 30 seqSzTimestepSz = seqSz * timestepSz
data = data_from_mfccFile("../lstm/train.mfcc") #data = data_from_mfccFile("../lstm/test580m_i_16h.mfcc")
startIndices = tf.random_uniform([batchSz], maxval=1000, dtype=tf.int32)
epochData = [] # list for b in range(epochSz): # for b-th batch x = [] # list y = [] # list for s in range(batchSz): # for s-th sequence i = tf.slice(startIndices, [s], [1]) # i is a tensor iX = i * timestepSz iY = (i + 1) * timestepSz x.append(tf.slice(data, iX, [seqSzTimestepSz])) y.append(tf.slice(data, iY, [seqSzTimestepSz])) x = tf.reshape(x, [batchSz, seqSz, timestepSz]) # input y = tf.reshape(y, [batchSz, seqSz, timestepSz]) # target
epochData.append((x,y),) # list of tuples; tuple (x,y) is a batchData
with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(100): ed = sess.run(epochData)
|