Skip to content

Commit

Permalink
Merge pull request #4 from GuitarML/feature-add-split-data-arg
Browse files Browse the repository at this point in the history
Feature add split data arg
  • Loading branch information
GuitarML authored Dec 6, 2020
2 parents 5fd1fa7 + f0aa7e4 commit 3c1dc9e
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 63 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ python predict.py data/ts9_test1_in_FP32.wav output models/ts9_model.h5

--training_mode=0 # enter 0, 1, or 2 for speed tranining, accuracy training, or extended training, respectively
--input_size=150 # sets the number of previous samples to consider for each output sample of audio
--split_data=3 # splits the input data by X amount to reduce RAM usage; trains the model on each split separately
--max_epochs=1 # sets the number of epochs to train for; intended to be increased dramatically for extended training
--batch_size=4096 # sets the batch size of data for training

Expand Down Expand Up @@ -93,6 +94,13 @@ which requires about 8GB of RAM. Increasing this setting will improve
training accuracy, but the size of the preprocessed wav data in
RAM will increase as well.

You can also use the "--split_data" parameter with train.py to
train the same model on separate sections of the data. This
will reduce RAM usage while still allowing a high input_size
setting. For example, "--split_data=5" would split the data
into 5 sections, and train each section separately. The default
is 1, or no splitting.

Adding a custom dataloader would reduce RAM usage at the cost of
training speed, and will be a focus of future work.

Expand Down
104 changes: 69 additions & 35 deletions guitar_lstm_colab.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
"# 1. Upload your input and output wav files to the current directory in Colab\n",
"# 2. Edit the USER INPUTS section to point to your wav files, and choose a\n",
"# model name, and number of epochs for training. If you experience \n",
"# crashing due to low RAM, reduce the \"input_size\" parameter.\n",
"# crashing due to low RAM, reduce the \"input_size\" parameter, or increase\n",
"# the \"split_data\" parameter.\n",
"# 3. Run each section of code. The trained models and output wav files will be \n",
"# added to the \"models\" directory.\n",
"#\n",
Expand All @@ -49,7 +50,7 @@
"import h5py\n",
"\n"
],
"execution_count": null,
"execution_count": 1,
"outputs": []
},
{
Expand All @@ -64,14 +65,14 @@
"in_file = 'ts9_test1_in_FP32.wav'\n",
"out_file = 'ts9_test1_out_FP32.wav'\n",
"epochs = 1\n",
"\n",
"split_data=4 # **Increase this to reduce RAM usage **\n",
"\n",
"train_mode = 0 # 0 = speed training, \n",
" # 1 = accuracy training \n",
" # 2 = extended training\n",
"\n",
"input_size = 75 # !!!IMPORTANT !!!: The input_size is set at 75 for Colab notebook. \n",
" # a higher setting may result in crashing due to\n",
"input_size = 150 # !!!IMPORTANT !!!: The input_size is set at 150 for Colab notebook. \n",
" # A higher setting may result in crashing due to\n",
" # memory limitation of 8GB for the free version\n",
" # of Colab. This setting limits the accuracy of\n",
" # the training, especially for complex guitar signals\n",
Expand All @@ -80,6 +81,9 @@
" # !!!IMPORTANT!!!: You will most likely need to cycle the runtime to \n",
" # free up RAM between training sessions.\n",
" #\n",
" # Increase the \"split_data\" parameter to reduce the RAM used and\n",
" # still allow for a higher \"input_size\" setting. \n",
" #\n",
" # Future dev note: Using a custom dataloader may be a good\n",
" # workaround for this limitation, at the cost\n",
" # of slower training.\n",
Expand Down Expand Up @@ -159,24 +163,6 @@
" hidden_units= 96\n",
"\n",
"\n",
"# Load and Preprocess Data ###########################################\n",
"in_rate, in_data = wavfile.read(in_file)\n",
"out_rate, out_data = wavfile.read(out_file)\n",
"\n",
"X = in_data.astype(np.float32).flatten() \n",
"X = normalize(X).reshape(len(X),1) \n",
"y = out_data.astype(np.float32).flatten() \n",
"y = normalize(y).reshape(len(y),1) \n",
"\n",
"y_ordered = y[input_size-1:] \n",
"\n",
"indices = np.arange(input_size) + np.arange(len(X)-input_size+1)[:,np.newaxis] \n",
"X_ordered = tf.gather(X,indices) \n",
"\n",
"shuffled_indices = np.random.permutation(len(X_ordered)) \n",
"X_random = tf.gather(X_ordered,shuffled_indices)\n",
"y_random = tf.gather(y_ordered, shuffled_indices)\n",
"\n",
"# Create Sequential Model ###########################################\n",
"clear_session()\n",
"model = Sequential()\n",
Expand All @@ -187,27 +173,75 @@
"model.compile(optimizer=Adam(learning_rate=learning_rate), loss=error_to_signal, metrics=[error_to_signal])\n",
"print(model.summary())\n",
"\n",
"# Train Model ###################################################\n",
"model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=test_size) \n",
"# Load and Preprocess Data ###########################################\n",
"in_rate, in_data = wavfile.read(in_file)\n",
"out_rate, out_data = wavfile.read(out_file)\n",
"\n",
"X_all = in_data.astype(np.float32).flatten() \n",
"X_all = normalize(X_all).reshape(len(X_all),1) \n",
"y_all = out_data.astype(np.float32).flatten() \n",
"y_all = normalize(y_all).reshape(len(y_all),1) \n",
"\n",
"# If splitting the data for training, do this part\n",
"if split_data > 1:\n",
" num_split = len(X_all) // split_data\n",
" X = X_all[0:num_split*split_data]\n",
" y = y_all[0:num_split*split_data]\n",
" X_data = np.split(X, split_data)\n",
" y_data = np.split(y, split_data)\n",
"\n",
" # Perform training on each split dataset\n",
" for i in range(len(X_data)):\n",
" print(\"\\nTraining on split data \" + str(i+1) + \"/\" +str(len(X_data)))\n",
" X_split = X_data[i]\n",
" y_split = y_data[i]\n",
"\n",
" y_ordered = y_split[input_size-1:] \n",
"\n",
" indices = np.arange(input_size) + np.arange(len(X_split)-input_size+1)[:,np.newaxis] \n",
" X_ordered = tf.gather(X_split,indices) \n",
"\n",
" shuffled_indices = np.random.permutation(len(X_ordered)) \n",
" X_random = tf.gather(X_ordered,shuffled_indices)\n",
" y_random = tf.gather(y_ordered, shuffled_indices)\n",
"\n",
" # Train Model ###################################################\n",
" model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=0.2) \n",
"\n",
"model.save('models/'+name+'/'+name+'.h5')\n",
"\n",
"#model.save('model_data/')\n",
"#model = load_model('new_model_'+name+'.h5', custom_objects={'error_to_signal' : error_to_signal})\n",
"#learning_rate = 0.005\n",
"#model.compile(optimizer=Adam(learning_rate=learning_rate), loss=error_to_signal, metrics=[error_to_signal])\n",
" model.save('models/'+name+'/'+name+'.h5')\n",
"\n",
"# If training on the full set of input data in one run, do this part\n",
"else:\n",
" y_ordered = y_all[input_size-1:] \n",
"\n",
" indices = np.arange(input_size) + np.arange(len(X_all)-input_size+1)[:,np.newaxis] \n",
" X_ordered = tf.gather(X_all,indices) \n",
"\n",
" shuffled_indices = np.random.permutation(len(X_ordered)) \n",
" X_random = tf.gather(X_ordered,shuffled_indices)\n",
" y_random = tf.gather(y_ordered, shuffled_indices)\n",
"\n",
" # Train Model ###################################################\n",
" model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=test_size) \n",
"\n",
" model.save('models/'+name+'/'+name+'.h5')\n",
"\n",
"# Run Prediction #################################################\n",
"print(\"Running prediction..\")\n",
"y_the_rest, y_last_part = np.split(y_ordered, [int(len(y_ordered)*.8)])\n",
"x_the_rest, x_last_part = np.split(X, [int(len(X)*.8)])\n",
"\n",
"x_the_rest, x_ordered_last_part = np.split(X_ordered, [int(len(X_ordered)*.8)])\n",
"prediction = model.predict(x_ordered_last_part, batch_size=batch_size)\n",
"# Get the last 20% of the wav data to run prediction and plot results\n",
"y_the_rest, y_last_part = np.split(y_all, [int(len(y_all)*.8)])\n",
"x_the_rest, x_last_part = np.split(X_all, [int(len(X_all)*.8)])\n",
"y_test = y_last_part[input_size-1:] \n",
"indices = np.arange(input_size) + np.arange(len(x_last_part)-input_size+1)[:,np.newaxis] \n",
"X_test = tf.gather(x_last_part,indices) \n",
"\n",
"prediction = model.predict(X_test, batch_size=batch_size)\n",
"\n",
"save_wav('models/'+name+'/y_pred.wav', prediction)\n",
"save_wav('models/'+name+'/x_test.wav', x_last_part)\n",
"save_wav('models/'+name+'/y_test.wav', y_last_part)\n",
"save_wav('models/'+name+'/y_test.wav', y_test)\n",
"\n",
"# Add additional data to the saved model (like input_size)\n",
"filename = 'models/'+name+'/'+name+'.h5'\n",
Expand Down
93 changes: 65 additions & 28 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ def main(args):
Note: RAM may be a limiting factor for the parameter "input_size". The wav data
is preprocessed and stored in RAM, which improves training speed but quickly runs out
if using a large number for "input_size". Reduce this if you are experiencing
RAM issues.
RAM issues. Also, you can use the "--split_data" option to divide the data by the
specified amount and train the model on each set. Doing this will allow for a higher
input_size setting (more accurate results).
--training_mode=0 Speed training (default)
--training_mode=1 Accuracy training
Expand Down Expand Up @@ -89,25 +91,6 @@ def main(args):
conv1d_filters = 36
hidden_units= 96


# Load and Preprocess Data ###########################################
in_rate, in_data = wavfile.read(args.in_file)
out_rate, out_data = wavfile.read(args.out_file)

X = in_data.astype(np.float32).flatten()
X = normalize(X).reshape(len(X),1)
y = out_data.astype(np.float32).flatten()
y = normalize(y).reshape(len(y),1)

y_ordered = y[input_size-1:]

indices = np.arange(input_size) + np.arange(len(X)-input_size+1)[:,np.newaxis]
X_ordered = tf.gather(X,indices)

shuffled_indices = np.random.permutation(len(X_ordered))
X_random = tf.gather(X_ordered,shuffled_indices)
y_random = tf.gather(y_ordered, shuffled_indices)

# Create Sequential Model ###########################################
clear_session()
model = Sequential()
Expand All @@ -118,22 +101,75 @@ def main(args):
model.compile(optimizer=Adam(learning_rate=learning_rate), loss=error_to_signal, metrics=[error_to_signal])
print(model.summary())

# Train Model ###################################################
model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=test_size)
# Load and Preprocess Data ###########################################
in_rate, in_data = wavfile.read(args.in_file)
out_rate, out_data = wavfile.read(args.out_file)

X_all = in_data.astype(np.float32).flatten()
X_all = normalize(X_all).reshape(len(X_all),1)
y_all = out_data.astype(np.float32).flatten()
y_all = normalize(y_all).reshape(len(y_all),1)

model.save('models/'+name+'/'+name+'.h5')
# If splitting the data for training, do this part
if args.split_data > 1:
num_split = len(X_all) // args.split_data
X = X_all[0:num_split*args.split_data]
y = y_all[0:num_split*args.split_data]
X_data = np.split(X, args.split_data)
y_data = np.split(y, args.split_data)

# Perform training on each split dataset
for i in range(len(X_data)):
print("\nTraining on split data " + str(i+1) + "/" +str(len(X_data)))
X_split = X_data[i]
y_split = y_data[i]

y_ordered = y_split[input_size-1:]

indices = np.arange(input_size) + np.arange(len(X_split)-input_size+1)[:,np.newaxis]
X_ordered = tf.gather(X_split,indices)

shuffled_indices = np.random.permutation(len(X_ordered))
X_random = tf.gather(X_ordered,shuffled_indices)
y_random = tf.gather(y_ordered, shuffled_indices)

# Train Model ###################################################
model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=0.2)


model.save('models/'+name+'/'+name+'.h5')

# If training on the full set of input data in one run, do this part
else:
y_ordered = y_all[input_size-1:]

indices = np.arange(input_size) + np.arange(len(X_all)-input_size+1)[:,np.newaxis]
X_ordered = tf.gather(X_all,indices)

shuffled_indices = np.random.permutation(len(X_ordered))
X_random = tf.gather(X_ordered,shuffled_indices)
y_random = tf.gather(y_ordered, shuffled_indices)

# Train Model ###################################################
model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=test_size)

model.save('models/'+name+'/'+name+'.h5')

# Run Prediction #################################################
print("Running prediction..")
y_the_rest, y_last_part = np.split(y_ordered, [int(len(y_ordered)*.8)])
x_the_rest, x_last_part = np.split(X, [int(len(X)*.8)])

x_the_rest, x_ordered_last_part = np.split(X_ordered, [int(len(X_ordered)*.8)])
prediction = model.predict(x_ordered_last_part, batch_size=batch_size)
# Get the last 20% of the wav data to run prediction and plot results
y_the_rest, y_last_part = np.split(y_all, [int(len(y_all)*.8)])
x_the_rest, x_last_part = np.split(X_all, [int(len(X_all)*.8)])
y_test = y_last_part[input_size-1:]
indices = np.arange(input_size) + np.arange(len(x_last_part)-input_size+1)[:,np.newaxis]
X_test = tf.gather(x_last_part,indices)

prediction = model.predict(X_test, batch_size=batch_size)

save_wav('models/'+name+'/y_pred.wav', prediction)
save_wav('models/'+name+'/x_test.wav', x_last_part)
save_wav('models/'+name+'/y_test.wav', y_last_part)
save_wav('models/'+name+'/y_test.wav', y_test)

# Add additional data to the saved model (like input_size)
filename = 'models/'+name+'/'+name+'.h5'
Expand Down Expand Up @@ -166,5 +202,6 @@ def main(args):
parser.add_argument("--max_epochs", type=int, default=1)
parser.add_argument("--create_plots", type=int, default=1)
parser.add_argument("--input_size", type=int, default=100)
parser.add_argument("--split_data", type=int, default=1)
args = parser.parse_args()
main(args)

0 comments on commit 3c1dc9e

Please sign in to comment.