From 698c18fa8e6a8948150ade6206d037a2d14f4afa Mon Sep 17 00:00:00 2001
From: Michal Choinski <michal@restb.ai>
Date: Thu, 28 Apr 2022 13:45:43 +0200
Subject: [PATCH] :sparkles: notebook for the workshop

---
 workshop/places365_train.ipynb | 420 +++++++++++++++++++++++++++++++++
 1 file changed, 420 insertions(+)
 create mode 100644 workshop/places365_train.ipynb

diff --git a/workshop/places365_train.ipynb b/workshop/places365_train.ipynb
new file mode 100644
index 0000000..4a4e8d0
--- /dev/null
+++ b/workshop/places365_train.ipynb
@@ -0,0 +1,420 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mLjoHocGsXa_"
+      },
+      "source": [
+        "## Preparing the environment"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!gdown 1y-LdQ_4dbOip6sBgZ-Ub1FI6Hh5kl3h1"
+      ],
+      "metadata": {
+        "id": "x9DwxHaAj3V4"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "SdcVz796SPRJ"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install plot_keras_history"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MUByrvr7sbj0"
+      },
+      "source": [
+        "## Preparing the dataset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OvyDyYG2ybOv"
+      },
+      "outputs": [],
+      "source": [
+        "dataset_name = \"places365_300\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UFM1GfuOyD7v"
+      },
+      "outputs": [],
+      "source": [
+        "!tar -xf {dataset_name}.tar"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "m-KvXhGgseu5"
+      },
+      "source": [
+        "## Importing the necessary libraries"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "AxR7wv3AzmKp"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "\n",
+        "import numpy as np\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "from plot_keras_history import show_history\n",
+        "import matplotlib.pyplot as plt\n",
+        "from sklearn.metrics import classification_report, confusion_matrix\n",
+        "import seaborn as sns\n",
+        "from mpl_toolkits.axes_grid1 import ImageGrid\n",
+        "import math"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MegEbOfYskRE"
+      },
+      "source": [
+        "## Preprocessing the dataset"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The following code loads images and builds a preprocessing pipeline within Datasets (Tensorflow-specific structures providing input data).\n",
+        "\n",
+        "Documentation: https://www.tensorflow.org/guide/data"
+      ],
+      "metadata": {
+        "id": "YlN_UCYM4oH9"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yuAsr9Aq1ZdG"
+      },
+      "outputs": [],
+      "source": [
+        "class_names = [\"alley\", \"entrance_hall\", \"park\"]\n",
+        "\n",
+        "\n",
+        "def get_label(file_path):\n",
+        "  parts = tf.strings.split(file_path, os.path.sep)\n",
+        "  one_hot = parts[-2] == class_names\n",
+        "  return tf.argmax(one_hot)\n",
+        "\n",
+        "\n",
+        "def process_path(file_path, img_size=(224, 224)):\n",
+        "  label = get_label(file_path)\n",
+        "  img = tf.io.read_file(file_path)\n",
+        "  img = tf.io.decode_jpeg(img, channels=3)\n",
+        "  img = tf.image.resize(img, [img_size[0], img_size[1]])\n",
+        "  return img, label\n",
+        "\n",
+        "\n",
+        "def build_dataset(path, sub_path):\n",
+        "  ds = tf.data.Dataset.list_files(str(f\"{path}/{sub_path}/*/*\"))\n",
+        "  ds = ds.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)\n",
+        "  ds = ds.cache()\n",
+        "  ds = ds.batch(64)\n",
+        "  ds = ds.prefetch(tf.data.AUTOTUNE)\n",
+        "  return ds"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NYDLidr11Sri"
+      },
+      "outputs": [],
+      "source": [
+        "train_ds = build_dataset(dataset_name, \"train\")\n",
+        "val_ds = build_dataset(dataset_name, \"val\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LZIBzMs1soYP"
+      },
+      "source": [
+        "## Building the model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "This code builds a model that will be trained in the following cells. The field `base_model` is initialized with a pre-defined architecture loaded from Keras.\n",
+        "\n",
+        "You can find the list of possible architectures at https://keras.io/api/applications/\n",
+        "\n",
+        "On top of `base_model`, we add a fully-connected layer with 3 neurons, and a softmax activation function. This settings allows scaling the output in a way that it can be interpreted as a probability distribution where all of the probabilities sum up to 1.\n"
+      ],
+      "metadata": {
+        "id": "ws_I8t8f5QeB"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "l7UrpLIa9QR3"
+      },
+      "outputs": [],
+      "source": [
+        "class Places365Model(tf.keras.Model):\n",
+        "  def __init__(self):\n",
+        "    super().__init__()\n",
+        "    self.base_model = tf.keras.applications.MobileNet(\n",
+        "      input_shape=(224, 224, 3),\n",
+        "      include_top=False,\n",
+        "      weights=None,\n",
+        "      classes=3,\n",
+        "      pooling=\"avg\"\n",
+        "    )\n",
+        "    self.fc = tf.keras.layers.Dense(3, activation='softmax')\n",
+        "\n",
+        "  def call(self, x):\n",
+        "    x = self.base_model(x)\n",
+        "    return self.fc(x)\n",
+        "\n",
+        "\n",
+        "model = Places365Model()\n",
+        "model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='accuracy')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "W8rJpL9Ss8Wt"
+      },
+      "source": [
+        "## Training the model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jzNnDGkKzVDc"
+      },
+      "outputs": [],
+      "source": [
+        "history = model.fit(\n",
+        "    train_ds,\n",
+        "    epochs=10)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "__kwjFK9sKSK"
+      },
+      "source": [
+        "## Plotting the metrics"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Let's see the results of our training!"
+      ],
+      "metadata": {
+        "id": "Lhdx7LkU_vxx"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6pFRBNs6SJjm"
+      },
+      "outputs": [],
+      "source": [
+        "show_history(history)\n",
+        "plt.close()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mmQHHkyCr8Cg"
+      },
+      "source": [
+        "## Confusion matrix"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Confusion matrix is a tool that helps us to interpret the results and draw conclusions about the characteristics of the model.\n",
+        "\n",
+        "It is a table presenting the number of correct predictions, and showing us how many mistakes it made, specifically focusing on the confusions between classes.\n",
+        "\n",
+        "A row represents the true class, and column represents a predicted one."
+      ],
+      "metadata": {
+        "id": "KNtWihJPAfes"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hByY7yt81t9o"
+      },
+      "outputs": [],
+      "source": [
+        "results = model.predict(val_ds)\n",
+        "ds = list(val_ds.unbatch().as_numpy_iterator())\n",
+        "\n",
+        "y_true = [class_names[entry[1]] for entry in ds]\n",
+        "y_pred = [class_names[np.argmax(result)] for result in results]\n",
+        "\n",
+        "matrix_confusion = confusion_matrix(y_true, y_pred)\n",
+        "f, ax = plt.subplots(figsize=(10, 10))\n",
+        "ax = sns.heatmap(matrix_confusion, square=True, annot=True, cmap='Blues', fmt='d', cbar=False,\n",
+        "                 xticklabels=class_names, yticklabels=class_names, annot_kws={\"fontsize\": 20})\n",
+        "ax.xaxis.tick_top()\n",
+        "ax.xaxis.set_label_position('top')\n",
+        "plt.tick_params(axis='both', which='major', labelsize=25, labelbottom=False, left=False, bottom=False, top=False, labeltop=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bRDeeXIr3HNo"
+      },
+      "source": [
+        "## Classification report"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Let's see the classifiation report!\n",
+        "\n",
+        "We can observe the following metrics:\n",
+        "- **precision** - this a frequentist probability that the predicted class is true. It is calculated with the equation TP / (TP + FP).\n",
+        "With just one positive and correct prediction, precision will have a value of 1, even if all the other ones were False Negatives.\n",
+        "- **recall** - this is a frequentist probability that the true class is not missed. It is calculated with the equation TP / (TP + FN). If all the predictions are positive, recall will have a value of 1.\n",
+        "\n",
+        "The rule is that increasing precision is done at the cost of recall, and vice-versa increasing recall decreases precision. This is why, we have another metric:\n",
+        "- **f1** - a harmonic mean between precision and recall: 2 * precision * recall / (precision + recall).\n",
+        "\n",
+        "On the other hand, we have:\n",
+        "- **accuracy** - it tells us what is the ratio of correct predictions (in terms of both True Positives and True Negatives). It is calculated with the equation (TP + TN) / (TP + FP + TN + FN)\n",
+        "\n",
+        "In general, f1 and accuracy provide better information about the quality of the model, whiile precision and recall can describe its additional characteristics.\n",
+        "\n",
+        "When the dataset is unbalanced, f1 is much more reliable than accuracy.\n",
+        "\n",
+        "\n",
+        "The following article explains all the above quite well:\n",
+        "\n",
+        "https://medium.com/analytics-vidhya/confusion-matrix-accuracy-precision-recall-f1-score-ade299cf63cd"
+      ],
+      "metadata": {
+        "id": "VjeQNCVsBScm"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "aS5LhPp13Let"
+      },
+      "outputs": [],
+      "source": [
+        "print(classification_report(y_true, y_pred, target_names=class_names))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rtHJu6P31uIK"
+      },
+      "source": [
+        "## Misclassified images"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Let's plot the misclassified images and see how our model confused their classes!"
+      ],
+      "metadata": {
+        "id": "iU16Qm19Goai"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LYtKyhSTtd1f"
+      },
+      "outputs": [],
+      "source": [
+        "images_to_plot = []\n",
+        "\n",
+        "for result, gt in zip(results, ds):\n",
+        "  img, label = gt\n",
+        "  predicted_class = np.argmax(result)\n",
+        "  if predicted_class != label:\n",
+        "    images_to_plot.append((img, label, predicted_class))\n",
+        "\n",
+        "fig = plt.figure(figsize=(128., 128.))\n",
+        "grid = ImageGrid(fig, 111, nrows_ncols=(math.ceil(len(images_to_plot) / 4), 4), axes_pad=0.6,)\n",
+        "\n",
+        "for ax, im in zip(grid, images_to_plot):\n",
+        "    ax.set_title(f\"True: {class_names[im[1]]}, predicted: {class_names[im[2]]}\", fontdict=None, loc='center', color = \"k\", fontsize=15)\n",
+        "    ax.imshow(im[0] / 255)\n",
+        "\n",
+        "plt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "places365 - train.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file