Merge pull request #4 from GuitarML/feature-add-split-data-arg

Feature add split data arg
GuitarML · Dec 6, 2020 · 3c1dc9e · 3c1dc9e
2 parents 5fd1fa7 + f0aa7e4
commit 3c1dc9e
Show file tree

Hide file tree

Showing 3 changed files with 142 additions and 63 deletions.
diff --git a/README.md b/README.md
@@ -54,6 +54,7 @@ python predict.py data/ts9_test1_in_FP32.wav output models/ts9_model.h5
 
 --training_mode=0  # enter 0, 1, or 2 for speed tranining, accuracy training, or extended training, respectively
 --input_size=150   # sets the number of previous samples to consider for each output sample of audio
+--split_data=3     # splits the input data by X amount to reduce RAM usage; trains the model on each split separately
 --max_epochs=1     # sets the number of epochs to train for; intended to be increased dramatically for extended training
 --batch_size=4096  # sets the batch size of data for training
 
@@ -93,6 +94,13 @@ which requires about 8GB of RAM. Increasing this setting will improve
 training accuracy, but the size of the preprocessed wav data in 
 RAM will increase as well.
 
+You can also use the "--split_data" parameter with train.py to
+train the same model on separate sections of the data. This
+will reduce RAM usage while still allowing a high input_size
+setting. For example, "--split_data=5" would split the data 
+into 5 sections, and train each section separately. The default
+is 1, or no splitting.
+
 Adding a custom dataloader would reduce RAM usage at the cost of
 training speed, and will be a focus of future work. 
 

diff --git a/guitar_lstm_colab.ipynb b/guitar_lstm_colab.ipynb
@@ -24,7 +24,8 @@
         "#    1. Upload your input and output wav files to the current directory in Colab\n",
         "#    2. Edit the USER INPUTS section to point to your wav files, and choose a\n",
         "#         model name, and number of epochs for training. If you experience \n",
-        "#         crashing due to low RAM, reduce the \"input_size\" parameter.\n",
+        "#         crashing due to low RAM, reduce the \"input_size\" parameter, or increase\n",
+        "#         the \"split_data\" parameter.\n",
         "#    3. Run each section of code. The trained models and output wav files will be \n",
         "#         added to the \"models\" directory.\n",
         "#\n",
@@ -49,7 +50,7 @@
         "import h5py\n",
         "\n"
       ],
-      "execution_count": null,
+      "execution_count": 1,
       "outputs": []
     },
     {
@@ -64,14 +65,14 @@
         "in_file = 'ts9_test1_in_FP32.wav'\n",
         "out_file = 'ts9_test1_out_FP32.wav'\n",
         "epochs = 1\n",
-        "\n",
+        "split_data=4 # **Increase this to reduce RAM usage **\n",
         "\n",
         "train_mode = 0     # 0 = speed training, \n",
         "                   # 1 = accuracy training \n",
         "                   # 2 = extended training\n",
         "\n",
-        "input_size = 75  # !!!IMPORTANT !!!: The input_size is set at 75 for Colab notebook. \n",
-        "                 #                     a higher setting may result in crashing due to\n",
+        "input_size = 150  # !!!IMPORTANT !!!: The input_size is set at 150 for Colab notebook. \n",
+        "                 #                     A higher setting may result in crashing due to\n",
         "                 #                     memory limitation of 8GB for the free version\n",
         "                 #                     of Colab. This setting limits the accuracy of\n",
         "                 #                     the training, especially for complex guitar signals\n",
@@ -80,6 +81,9 @@
         "                 # !!!IMPORTANT!!!: You will most likely need to cycle the runtime to \n",
         "                 #                   free up RAM between training sessions.\n",
         "                 #\n",
+        "                 # Increase the \"split_data\" parameter to reduce the RAM used and\n",
+        "                 #    still allow for a higher \"input_size\" setting.     \n",
+        "                 #\n",
         "                 # Future dev note: Using a custom dataloader may be a good\n",
         "                 #                     workaround for this limitation, at the cost\n",
         "                 #                     of slower training.\n",
@@ -159,24 +163,6 @@
         "    hidden_units= 96\n",
         "\n",
         "\n",
-        "# Load and Preprocess Data ###########################################\n",
-        "in_rate, in_data = wavfile.read(in_file)\n",
-        "out_rate, out_data = wavfile.read(out_file)\n",
-        "\n",
-        "X = in_data.astype(np.float32).flatten()  \n",
-        "X = normalize(X).reshape(len(X),1)   \n",
-        "y = out_data.astype(np.float32).flatten() \n",
-        "y = normalize(y).reshape(len(y),1)   \n",
-        "\n",
-        "y_ordered = y[input_size-1:] \n",
-        "\n",
-        "indices = np.arange(input_size) + np.arange(len(X)-input_size+1)[:,np.newaxis] \n",
-        "X_ordered = tf.gather(X,indices) \n",
-        "\n",
-        "shuffled_indices = np.random.permutation(len(X_ordered)) \n",
-        "X_random = tf.gather(X_ordered,shuffled_indices)\n",
-        "y_random = tf.gather(y_ordered, shuffled_indices)\n",
-        "\n",
         "# Create Sequential Model ###########################################\n",
         "clear_session()\n",
         "model = Sequential()\n",
@@ -187,27 +173,75 @@
         "model.compile(optimizer=Adam(learning_rate=learning_rate), loss=error_to_signal, metrics=[error_to_signal])\n",
         "print(model.summary())\n",
         "\n",
-        "# Train Model ###################################################\n",
-        "model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=test_size)    \n",
+        "# Load and Preprocess Data ###########################################\n",
+        "in_rate, in_data = wavfile.read(in_file)\n",
+        "out_rate, out_data = wavfile.read(out_file)\n",
+        "\n",
+        "X_all = in_data.astype(np.float32).flatten()  \n",
+        "X_all = normalize(X_all).reshape(len(X_all),1)   \n",
+        "y_all = out_data.astype(np.float32).flatten() \n",
+        "y_all = normalize(y_all).reshape(len(y_all),1)   \n",
+        "\n",
+        "# If splitting the data for training, do this part\n",
+        "if split_data > 1:\n",
+        "    num_split = len(X_all) // split_data\n",
+        "    X = X_all[0:num_split*split_data]\n",
+        "    y = y_all[0:num_split*split_data]\n",
+        "    X_data = np.split(X, split_data)\n",
+        "    y_data = np.split(y, split_data)\n",
+        "\n",
+        "    # Perform training on each split dataset\n",
+        "    for i in range(len(X_data)):\n",
+        "        print(\"\\nTraining on split data \" + str(i+1) + \"/\" +str(len(X_data)))\n",
+        "        X_split = X_data[i]\n",
+        "        y_split = y_data[i]\n",
+        "\n",
+        "        y_ordered = y_split[input_size-1:] \n",
+        "\n",
+        "        indices = np.arange(input_size) + np.arange(len(X_split)-input_size+1)[:,np.newaxis] \n",
+        "        X_ordered = tf.gather(X_split,indices) \n",
+        "\n",
+        "        shuffled_indices = np.random.permutation(len(X_ordered)) \n",
+        "        X_random = tf.gather(X_ordered,shuffled_indices)\n",
+        "        y_random = tf.gather(y_ordered, shuffled_indices)\n",
+        "\n",
+        "        # Train Model ###################################################\n",
+        "        model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=0.2)  \n",
         "\n",
-        "model.save('models/'+name+'/'+name+'.h5')\n",
         "\n",
-        "#model.save('model_data/')\n",
-        "#model = load_model('new_model_'+name+'.h5', custom_objects={'error_to_signal' : error_to_signal})\n",
-        "#learning_rate = 0.005\n",
-        "#model.compile(optimizer=Adam(learning_rate=learning_rate), loss=error_to_signal, metrics=[error_to_signal])\n",
+        "    model.save('models/'+name+'/'+name+'.h5')\n",
+        "\n",
+        "# If training on the full set of input data in one run, do this part\n",
+        "else:\n",
+        "    y_ordered = y_all[input_size-1:] \n",
+        "\n",
+        "    indices = np.arange(input_size) + np.arange(len(X_all)-input_size+1)[:,np.newaxis] \n",
+        "    X_ordered = tf.gather(X_all,indices) \n",
+        "\n",
+        "    shuffled_indices = np.random.permutation(len(X_ordered)) \n",
+        "    X_random = tf.gather(X_ordered,shuffled_indices)\n",
+        "    y_random = tf.gather(y_ordered, shuffled_indices)\n",
+        "\n",
+        "    # Train Model ###################################################\n",
+        "    model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=test_size)    \n",
+        "\n",
+        "    model.save('models/'+name+'/'+name+'.h5')\n",
         "\n",
         "# Run Prediction #################################################\n",
         "print(\"Running prediction..\")\n",
-        "y_the_rest, y_last_part = np.split(y_ordered, [int(len(y_ordered)*.8)])\n",
-        "x_the_rest, x_last_part = np.split(X, [int(len(X)*.8)])\n",
         "\n",
-        "x_the_rest, x_ordered_last_part = np.split(X_ordered, [int(len(X_ordered)*.8)])\n",
-        "prediction = model.predict(x_ordered_last_part, batch_size=batch_size)\n",
+        "# Get the last 20% of the wav data to run prediction and plot results\n",
+        "y_the_rest, y_last_part = np.split(y_all, [int(len(y_all)*.8)])\n",
+        "x_the_rest, x_last_part = np.split(X_all, [int(len(X_all)*.8)])\n",
+        "y_test = y_last_part[input_size-1:] \n",
+        "indices = np.arange(input_size) + np.arange(len(x_last_part)-input_size+1)[:,np.newaxis] \n",
+        "X_test = tf.gather(x_last_part,indices) \n",
+        "\n",
+        "prediction = model.predict(X_test, batch_size=batch_size)\n",
         "\n",
         "save_wav('models/'+name+'/y_pred.wav', prediction)\n",
         "save_wav('models/'+name+'/x_test.wav', x_last_part)\n",
-        "save_wav('models/'+name+'/y_test.wav', y_last_part)\n",
+        "save_wav('models/'+name+'/y_test.wav', y_test)\n",
         "\n",
         "# Add additional data to the saved model (like input_size)\n",
         "filename = 'models/'+name+'/'+name+'.h5'\n",

diff --git a/train.py b/train.py
@@ -50,7 +50,9 @@ def main(args):
         Note: RAM may be a limiting factor for the parameter "input_size". The wav data
           is preprocessed and stored in RAM, which improves training speed but quickly runs out
           if using a large number for "input_size".  Reduce this if you are experiencing
-          RAM issues. 
+          RAM issues. Also, you can use the "--split_data" option to divide the data by the
+          specified amount and train the model on each set. Doing this will allow for a higher
+          input_size setting (more accurate results).
         
         --training_mode=0   Speed training (default)
         --training_mode=1   Accuracy training
@@ -89,25 +91,6 @@ def main(args):
         conv1d_filters = 36
         hidden_units= 96
 
-
-    # Load and Preprocess Data ###########################################
-    in_rate, in_data = wavfile.read(args.in_file)
-    out_rate, out_data = wavfile.read(args.out_file)
-
-    X = in_data.astype(np.float32).flatten()  
-    X = normalize(X).reshape(len(X),1)   
-    y = out_data.astype(np.float32).flatten() 
-    y = normalize(y).reshape(len(y),1)   
-
-    y_ordered = y[input_size-1:] 
-
-    indices = np.arange(input_size) + np.arange(len(X)-input_size+1)[:,np.newaxis] 
-    X_ordered = tf.gather(X,indices) 
-
-    shuffled_indices = np.random.permutation(len(X_ordered)) 
-    X_random = tf.gather(X_ordered,shuffled_indices)
-    y_random = tf.gather(y_ordered, shuffled_indices)
-
     # Create Sequential Model ###########################################
     clear_session()
     model = Sequential()
@@ -118,22 +101,75 @@ def main(args):
     model.compile(optimizer=Adam(learning_rate=learning_rate), loss=error_to_signal, metrics=[error_to_signal])
     print(model.summary())
 
-    # Train Model ###################################################
-    model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=test_size)    
+    # Load and Preprocess Data ###########################################
+    in_rate, in_data = wavfile.read(args.in_file)
+    out_rate, out_data = wavfile.read(args.out_file)
+
+    X_all = in_data.astype(np.float32).flatten()  
+    X_all = normalize(X_all).reshape(len(X_all),1)   
+    y_all = out_data.astype(np.float32).flatten() 
+    y_all = normalize(y_all).reshape(len(y_all),1)   
 
-    model.save('models/'+name+'/'+name+'.h5')
+    # If splitting the data for training, do this part
+    if args.split_data > 1:
+        num_split = len(X_all) // args.split_data
+        X = X_all[0:num_split*args.split_data]
+        y = y_all[0:num_split*args.split_data]
+        X_data = np.split(X, args.split_data)
+        y_data = np.split(y, args.split_data)
+
+        # Perform training on each split dataset
+        for i in range(len(X_data)):
+            print("\nTraining on split data " + str(i+1) + "/" +str(len(X_data)))
+            X_split = X_data[i]
+            y_split = y_data[i]
+
+            y_ordered = y_split[input_size-1:] 
+
+            indices = np.arange(input_size) + np.arange(len(X_split)-input_size+1)[:,np.newaxis] 
+            X_ordered = tf.gather(X_split,indices) 
+
+            shuffled_indices = np.random.permutation(len(X_ordered)) 
+            X_random = tf.gather(X_ordered,shuffled_indices)
+            y_random = tf.gather(y_ordered, shuffled_indices)
+
+            # Train Model ###################################################
+            model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=0.2)  
+
+
+        model.save('models/'+name+'/'+name+'.h5')
+
+    # If training on the full set of input data in one run, do this part
+    else:
+        y_ordered = y_all[input_size-1:] 
+
+        indices = np.arange(input_size) + np.arange(len(X_all)-input_size+1)[:,np.newaxis] 
+        X_ordered = tf.gather(X_all,indices) 
+
+        shuffled_indices = np.random.permutation(len(X_ordered)) 
+        X_random = tf.gather(X_ordered,shuffled_indices)
+        y_random = tf.gather(y_ordered, shuffled_indices)
+
+        # Train Model ###################################################
+        model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=test_size)    
+
+        model.save('models/'+name+'/'+name+'.h5')
 
     # Run Prediction #################################################
     print("Running prediction..")
-    y_the_rest, y_last_part = np.split(y_ordered, [int(len(y_ordered)*.8)])
-    x_the_rest, x_last_part = np.split(X, [int(len(X)*.8)])
 
-    x_the_rest, x_ordered_last_part = np.split(X_ordered, [int(len(X_ordered)*.8)])
-    prediction = model.predict(x_ordered_last_part, batch_size=batch_size)
+    # Get the last 20% of the wav data to run prediction and plot results
+    y_the_rest, y_last_part = np.split(y_all, [int(len(y_all)*.8)])
+    x_the_rest, x_last_part = np.split(X_all, [int(len(X_all)*.8)])
+    y_test = y_last_part[input_size-1:] 
+    indices = np.arange(input_size) + np.arange(len(x_last_part)-input_size+1)[:,np.newaxis] 
+    X_test = tf.gather(x_last_part,indices) 
+
+    prediction = model.predict(X_test, batch_size=batch_size)
 
     save_wav('models/'+name+'/y_pred.wav', prediction)
     save_wav('models/'+name+'/x_test.wav', x_last_part)
-    save_wav('models/'+name+'/y_test.wav', y_last_part)
+    save_wav('models/'+name+'/y_test.wav', y_test)
 
     # Add additional data to the saved model (like input_size)
     filename = 'models/'+name+'/'+name+'.h5'
@@ -166,5 +202,6 @@ def main(args):
     parser.add_argument("--max_epochs", type=int, default=1)
     parser.add_argument("--create_plots", type=int, default=1)
     parser.add_argument("--input_size", type=int, default=100)
+    parser.add_argument("--split_data", type=int, default=1)
     args = parser.parse_args()
     main(args)