diff --git a/8_0_NSYNTH_iterator.ipynb b/8_0_NSYNTH_iterator.ipynb new file mode 100644 index 0000000..a0602c0 --- /dev/null +++ b/8_0_NSYNTH_iterator.ipynb @@ -0,0 +1,1297 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "8.0-NSYNTH-iterator.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "7cdc8e7bc48945d99626f03332d3f223": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_80aed0544eca450fb2bbbf5af130e2b5", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_c18aef7b882b44b78c3f4f394ca68124", + "IPY_MODEL_12444d3318214c9d9f73c84948844cec" + ] + } + }, + "80aed0544eca450fb2bbbf5af130e2b5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "c18aef7b882b44b78c3f4f394ca68124": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_65f6a5e01f854fdaa1ed5b1f9c005f42", + "_dom_classes": [], + "description": "Dl Completed...: 48%", + "_model_name": "FloatProgressModel", + "bar_style": "", + "max": 1069, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 509, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_9594878a78314303a905d82149b1f4a6" + } + }, + "12444d3318214c9d9f73c84948844cec": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_33e7695cba0a4f1a9c40b184177d0036", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 509/1069 [11:52<12:59, 1.39s/ file]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_4577a09fed0a478eb7265636e54c293f" + } + }, + "65f6a5e01f854fdaa1ed5b1f9c005f42": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "initial", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "9594878a78314303a905d82149b1f4a6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "33e7695cba0a4f1a9c40b184177d0036": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "4577a09fed0a478eb7265636e54c293f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "XsoxGPJE6O2F" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/timsainb/tensorflow2-generative-models/blob/master/8.0-NSYNTH-iterator.ipynb)\n", + "\n", + "## Iterator for NSynth \n", + "\n", + "The NSYNTH dataset is a set of thousands of musical notes saved as waveforms. To input these into a Seq2Seq model as spectrograms, I wrote a small dataset class that converts to spectrogram in tensorflow (using the code from the spectrogramming notebook). \n", + "\n", + "![a dataset iterator for tensorflow 2.0](https://github.com/timsainb/tensorflow2-generative-models/blob/master/imgs/nsynth-dataset.png?raw=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "frFs6o7-6O2J" + }, + "source": [ + "### Install packages if in colab" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "UnM8bBmh6O2K", + "outputId": "48b1811e-3521-44a2-a1e5-bde7607aa7ad", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "### install necessary packages if in colab\n", + "def run_subprocess_command(cmd):\n", + " process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)\n", + " for line in process.stdout:\n", + " print(line.decode().strip())\n", + "\n", + "\n", + "import sys, subprocess\n", + "\n", + "IN_COLAB = \"google.colab\" in sys.modules\n", + "colab_requirements = [\n", + " \"pip install tf-nightly-gpu-2.0-preview==2.0.0.dev20190513\",\n", + " \"pip install librosa\",\n", + " \"pip uninstall librosa -y\",\n", + " \"pip install 'librosa==0.7.2'\",\n", + " \"pip install pathlib2\",\n", + " \"pip install numba\"\n", + "]\n", + "if IN_COLAB:\n", + " for i in colab_requirements:\n", + " run_subprocess_command(i)" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Requirement already satisfied: librosa in /usr/local/lib/python3.7/dist-packages (0.7.2)\n", + "Requirement already satisfied: resampy>=0.2.2 in /usr/local/lib/python3.7/dist-packages (from librosa) (0.2.2)\n", + "Requirement already satisfied: soundfile>=0.9.0 in /usr/local/lib/python3.7/dist-packages (from librosa) (0.10.3.post1)\n", + "Requirement already satisfied: audioread>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from librosa) (2.1.9)\n", + "Requirement already satisfied: decorator>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from librosa) (4.4.2)\n", + "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from librosa) (1.19.5)\n", + "Requirement already satisfied: scikit-learn!=0.19.0,>=0.14.0 in /usr/local/lib/python3.7/dist-packages (from librosa) (0.22.2.post1)\n", + "Requirement already satisfied: numba>=0.43.0 in /usr/local/lib/python3.7/dist-packages (from librosa) (0.51.2)\n", + "Requirement already satisfied: six>=1.3 in /usr/local/lib/python3.7/dist-packages (from librosa) (1.15.0)\n", + "Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from librosa) (1.4.1)\n", + "Requirement already satisfied: joblib>=0.12 in /usr/local/lib/python3.7/dist-packages (from librosa) (1.0.1)\n", + "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.7/dist-packages (from soundfile>=0.9.0->librosa) (1.14.6)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from numba>=0.43.0->librosa) (57.2.0)\n", + "Requirement already satisfied: llvmlite<0.35,>=0.34.0.dev0 in /usr/local/lib/python3.7/dist-packages (from numba>=0.43.0->librosa) (0.34.0)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.7/dist-packages (from cffi>=1.0->soundfile>=0.9.0->librosa) (2.20)\n", + "Uninstalling librosa-0.7.2:\n", + "Successfully uninstalled librosa-0.7.2\n", + "Requirement already satisfied: pathlib2 in /usr/local/lib/python3.7/dist-packages (2.3.6)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from pathlib2) (1.15.0)\n", + "Requirement already satisfied: numba in /usr/local/lib/python3.7/dist-packages (0.51.2)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from numba) (57.2.0)\n", + "Requirement already satisfied: llvmlite<0.35,>=0.34.0.dev0 in /usr/local/lib/python3.7/dist-packages (from numba) (0.34.0)\n", + "Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.7/dist-packages (from numba) (1.19.5)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "E3vUYC6W6O2L" + }, + "source": [ + "### load packages" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qW9bAs78BxCw", + "outputId": "4e31a2ae-f0d1-4823-98e7-ede8e847b5f7", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "!apt-get install python-pip python3-pip\n" + ], + "execution_count": 21, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Reading package lists... Done\n", + "Building dependency tree \n", + "Reading state information... Done\n", + "The following additional packages will be installed:\n", + " libpython-all-dev python-all python-all-dev python-asn1crypto\n", + " python-cffi-backend python-crypto python-cryptography python-dbus\n", + " python-enum34 python-gi python-idna python-ipaddress python-keyring\n", + " python-keyrings.alt python-pip-whl python-pkg-resources python-secretstorage\n", + " python-setuptools python-six python-wheel python-xdg python3-asn1crypto\n", + " python3-cffi-backend python3-crypto python3-cryptography python3-idna\n", + " python3-keyring python3-keyrings.alt python3-pkg-resources\n", + " python3-secretstorage python3-setuptools python3-six python3-wheel\n", + " python3-xdg\n", + "Suggested packages:\n", + " python-crypto-doc python-cryptography-doc python-cryptography-vectors\n", + " python-dbus-dbg python-dbus-doc python-enum34-doc python-gi-cairo\n", + " gnome-keyring libkf5wallet-bin gir1.2-gnomekeyring-1.0 python-fs\n", + " python-gdata python-keyczar python-secretstorage-doc python-setuptools-doc\n", + " python3-cryptography-vectors\n", + "The following NEW packages will be installed:\n", + " libpython-all-dev python-all python-all-dev python-asn1crypto\n", + " python-cffi-backend python-crypto python-cryptography python-dbus\n", + " python-enum34 python-gi python-idna python-ipaddress python-keyring\n", + " python-keyrings.alt python-pip python-pip-whl python-pkg-resources\n", + " python-secretstorage python-setuptools python-six python-wheel python-xdg\n", + " python3-asn1crypto python3-cffi-backend python3-crypto python3-cryptography\n", + " python3-idna python3-keyring python3-keyrings.alt python3-pip\n", + " python3-pkg-resources python3-secretstorage python3-setuptools python3-six\n", + " python3-wheel python3-xdg\n", + "0 upgraded, 36 newly installed, 0 to remove and 39 not upgraded.\n", + "Need to get 4,660 kB of archives.\n", + "After this operation, 17.8 MB of additional disk space will be used.\n", + "Get:1 http://archive.ubuntu.com/ubuntu bionic/main amd64 libpython-all-dev amd64 2.7.15~rc1-1 [1,092 B]\n", + "Get:2 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-all amd64 2.7.15~rc1-1 [1,076 B]\n", + "Get:3 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-all-dev amd64 2.7.15~rc1-1 [1,100 B]\n", + "Get:4 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-asn1crypto all 0.24.0-1 [72.7 kB]\n", + "Get:5 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-cffi-backend amd64 1.11.5-1 [63.4 kB]\n", + "Get:6 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-crypto amd64 2.6.1-8ubuntu2 [244 kB]\n", + "Get:7 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-enum34 all 1.1.6-2 [34.8 kB]\n", + "Get:8 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-idna all 2.6-1 [32.4 kB]\n", + "Get:9 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-ipaddress all 1.0.17-1 [18.2 kB]\n", + "Get:10 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-six all 1.11.0-2 [11.3 kB]\n", + "Get:11 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 python-cryptography amd64 2.1.4-1ubuntu1.4 [276 kB]\n", + "Get:12 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-dbus amd64 1.2.6-1 [90.2 kB]\n", + "Get:13 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 python-gi amd64 3.26.1-2ubuntu1 [197 kB]\n", + "Get:14 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-secretstorage all 2.3.1-2 [11.8 kB]\n", + "Get:15 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-keyring all 10.6.0-1 [30.6 kB]\n", + "Get:16 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-keyrings.alt all 3.0-1 [16.7 kB]\n", + "Get:17 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 python-pip-whl all 9.0.1-2.3~ubuntu1.18.04.5 [1,653 kB]\n", + "Get:18 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 python-pip all 9.0.1-2.3~ubuntu1.18.04.5 [151 kB]\n", + "Get:19 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-pkg-resources all 39.0.1-2 [128 kB]\n", + "Get:20 http://archive.ubuntu.com/ubuntu bionic/main amd64 python-setuptools all 39.0.1-2 [329 kB]\n", + "Get:21 http://archive.ubuntu.com/ubuntu bionic/universe amd64 python-wheel all 0.30.0-0.2 [36.4 kB]\n", + "Get:22 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 python-xdg all 0.25-4ubuntu1.1 [31.2 kB]\n", + "Get:23 http://archive.ubuntu.com/ubuntu bionic/main amd64 python3-asn1crypto all 0.24.0-1 [72.8 kB]\n", + "Get:24 http://archive.ubuntu.com/ubuntu bionic/main amd64 python3-cffi-backend amd64 1.11.5-1 [64.6 kB]\n", + "Get:25 http://archive.ubuntu.com/ubuntu bionic/main amd64 python3-crypto amd64 2.6.1-8ubuntu2 [244 kB]\n", + "Get:26 http://archive.ubuntu.com/ubuntu bionic/main amd64 python3-idna all 2.6-1 [32.5 kB]\n", + "Get:27 http://archive.ubuntu.com/ubuntu bionic/main amd64 python3-six all 1.11.0-2 [11.4 kB]\n", + "Get:28 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 python3-cryptography amd64 2.1.4-1ubuntu1.4 [220 kB]\n", + "Get:29 http://archive.ubuntu.com/ubuntu bionic/main amd64 python3-secretstorage all 2.3.1-2 [12.1 kB]\n", + "Get:30 http://archive.ubuntu.com/ubuntu bionic/main amd64 python3-keyring all 10.6.0-1 [26.7 kB]\n", + "Get:31 http://archive.ubuntu.com/ubuntu bionic/main amd64 python3-keyrings.alt all 3.0-1 [16.6 kB]\n", + "Get:32 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 python3-pip all 9.0.1-2.3~ubuntu1.18.04.5 [114 kB]\n", + "Get:33 http://archive.ubuntu.com/ubuntu bionic/main amd64 python3-pkg-resources all 39.0.1-2 [98.8 kB]\n", + "Get:34 http://archive.ubuntu.com/ubuntu bionic/main amd64 python3-setuptools all 39.0.1-2 [248 kB]\n", + "Get:35 http://archive.ubuntu.com/ubuntu bionic/universe amd64 python3-wheel all 0.30.0-0.2 [36.5 kB]\n", + "Get:36 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 python3-xdg all 0.25-4ubuntu1.1 [31.3 kB]\n", + "Fetched 4,660 kB in 3s (1,425 kB/s)\n", + "Extracting templates from packages: 100%\n", + "Selecting previously unselected package libpython-all-dev:amd64.\n", + "(Reading database ... 160837 files and directories currently installed.)\n", + "Preparing to unpack .../00-libpython-all-dev_2.7.15~rc1-1_amd64.deb ...\n", + "Unpacking libpython-all-dev:amd64 (2.7.15~rc1-1) ...\n", + "Selecting previously unselected package python-all.\n", + "Preparing to unpack .../01-python-all_2.7.15~rc1-1_amd64.deb ...\n", + "Unpacking python-all (2.7.15~rc1-1) ...\n", + "Selecting previously unselected package python-all-dev.\n", + "Preparing to unpack .../02-python-all-dev_2.7.15~rc1-1_amd64.deb ...\n", + "Unpacking python-all-dev (2.7.15~rc1-1) ...\n", + "Selecting previously unselected package python-asn1crypto.\n", + "Preparing to unpack .../03-python-asn1crypto_0.24.0-1_all.deb ...\n", + "Unpacking python-asn1crypto (0.24.0-1) ...\n", + "Selecting previously unselected package python-cffi-backend.\n", + "Preparing to unpack .../04-python-cffi-backend_1.11.5-1_amd64.deb ...\n", + "Unpacking python-cffi-backend (1.11.5-1) ...\n", + "Selecting previously unselected package python-crypto.\n", + "Preparing to unpack .../05-python-crypto_2.6.1-8ubuntu2_amd64.deb ...\n", + "Unpacking python-crypto (2.6.1-8ubuntu2) ...\n", + "Selecting previously unselected package python-enum34.\n", + "Preparing to unpack .../06-python-enum34_1.1.6-2_all.deb ...\n", + "Unpacking python-enum34 (1.1.6-2) ...\n", + "Selecting previously unselected package python-idna.\n", + "Preparing to unpack .../07-python-idna_2.6-1_all.deb ...\n", + "Unpacking python-idna (2.6-1) ...\n", + "Selecting previously unselected package python-ipaddress.\n", + "Preparing to unpack .../08-python-ipaddress_1.0.17-1_all.deb ...\n", + "Unpacking python-ipaddress (1.0.17-1) ...\n", + "Selecting previously unselected package python-six.\n", + "Preparing to unpack .../09-python-six_1.11.0-2_all.deb ...\n", + "Unpacking python-six (1.11.0-2) ...\n", + "Selecting previously unselected package python-cryptography.\n", + "Preparing to unpack .../10-python-cryptography_2.1.4-1ubuntu1.4_amd64.deb ...\n", + "Unpacking python-cryptography (2.1.4-1ubuntu1.4) ...\n", + "Selecting previously unselected package python-dbus.\n", + "Preparing to unpack .../11-python-dbus_1.2.6-1_amd64.deb ...\n", + "Unpacking python-dbus (1.2.6-1) ...\n", + "Selecting previously unselected package python-gi.\n", + "Preparing to unpack .../12-python-gi_3.26.1-2ubuntu1_amd64.deb ...\n", + "Unpacking python-gi (3.26.1-2ubuntu1) ...\n", + "Selecting previously unselected package python-secretstorage.\n", + "Preparing to unpack .../13-python-secretstorage_2.3.1-2_all.deb ...\n", + "Unpacking python-secretstorage (2.3.1-2) ...\n", + "Selecting previously unselected package python-keyring.\n", + "Preparing to unpack .../14-python-keyring_10.6.0-1_all.deb ...\n", + "Unpacking python-keyring (10.6.0-1) ...\n", + "Selecting previously unselected package python-keyrings.alt.\n", + "Preparing to unpack .../15-python-keyrings.alt_3.0-1_all.deb ...\n", + "Unpacking python-keyrings.alt (3.0-1) ...\n", + "Selecting previously unselected package python-pip-whl.\n", + "Preparing to unpack .../16-python-pip-whl_9.0.1-2.3~ubuntu1.18.04.5_all.deb ...\n", + "Unpacking python-pip-whl (9.0.1-2.3~ubuntu1.18.04.5) ...\n", + "Selecting previously unselected package python-pip.\n", + "Preparing to unpack .../17-python-pip_9.0.1-2.3~ubuntu1.18.04.5_all.deb ...\n", + "Unpacking python-pip (9.0.1-2.3~ubuntu1.18.04.5) ...\n", + "Selecting previously unselected package python-pkg-resources.\n", + "Preparing to unpack .../18-python-pkg-resources_39.0.1-2_all.deb ...\n", + "Unpacking python-pkg-resources (39.0.1-2) ...\n", + "Selecting previously unselected package python-setuptools.\n", + "Preparing to unpack .../19-python-setuptools_39.0.1-2_all.deb ...\n", + "Unpacking python-setuptools (39.0.1-2) ...\n", + "Selecting previously unselected package python-wheel.\n", + "Preparing to unpack .../20-python-wheel_0.30.0-0.2_all.deb ...\n", + "Unpacking python-wheel (0.30.0-0.2) ...\n", + "Selecting previously unselected package python-xdg.\n", + "Preparing to unpack .../21-python-xdg_0.25-4ubuntu1.1_all.deb ...\n", + "Unpacking python-xdg (0.25-4ubuntu1.1) ...\n", + "Selecting previously unselected package python3-asn1crypto.\n", + "Preparing to unpack .../22-python3-asn1crypto_0.24.0-1_all.deb ...\n", + "Unpacking python3-asn1crypto (0.24.0-1) ...\n", + "Selecting previously unselected package python3-cffi-backend.\n", + "Preparing to unpack .../23-python3-cffi-backend_1.11.5-1_amd64.deb ...\n", + "Unpacking python3-cffi-backend (1.11.5-1) ...\n", + "Selecting previously unselected package python3-crypto.\n", + "Preparing to unpack .../24-python3-crypto_2.6.1-8ubuntu2_amd64.deb ...\n", + "Unpacking python3-crypto (2.6.1-8ubuntu2) ...\n", + "Selecting previously unselected package python3-idna.\n", + "Preparing to unpack .../25-python3-idna_2.6-1_all.deb ...\n", + "Unpacking python3-idna (2.6-1) ...\n", + "Selecting previously unselected package python3-six.\n", + "Preparing to unpack .../26-python3-six_1.11.0-2_all.deb ...\n", + "Unpacking python3-six (1.11.0-2) ...\n", + "Selecting previously unselected package python3-cryptography.\n", + "Preparing to unpack .../27-python3-cryptography_2.1.4-1ubuntu1.4_amd64.deb ...\n", + "Unpacking python3-cryptography (2.1.4-1ubuntu1.4) ...\n", + "Selecting previously unselected package python3-secretstorage.\n", + "Preparing to unpack .../28-python3-secretstorage_2.3.1-2_all.deb ...\n", + "Unpacking python3-secretstorage (2.3.1-2) ...\n", + "Selecting previously unselected package python3-keyring.\n", + "Preparing to unpack .../29-python3-keyring_10.6.0-1_all.deb ...\n", + "Unpacking python3-keyring (10.6.0-1) ...\n", + "Selecting previously unselected package python3-keyrings.alt.\n", + "Preparing to unpack .../30-python3-keyrings.alt_3.0-1_all.deb ...\n", + "Unpacking python3-keyrings.alt (3.0-1) ...\n", + "Selecting previously unselected package python3-pip.\n", + "Preparing to unpack .../31-python3-pip_9.0.1-2.3~ubuntu1.18.04.5_all.deb ...\n", + "Unpacking python3-pip (9.0.1-2.3~ubuntu1.18.04.5) ...\n", + "Selecting previously unselected package python3-pkg-resources.\n", + "Preparing to unpack .../32-python3-pkg-resources_39.0.1-2_all.deb ...\n", + "Unpacking python3-pkg-resources (39.0.1-2) ...\n", + "Selecting previously unselected package python3-setuptools.\n", + "Preparing to unpack .../33-python3-setuptools_39.0.1-2_all.deb ...\n", + "Unpacking python3-setuptools (39.0.1-2) ...\n", + "Selecting previously unselected package python3-wheel.\n", + "Preparing to unpack .../34-python3-wheel_0.30.0-0.2_all.deb ...\n", + "Unpacking python3-wheel (0.30.0-0.2) ...\n", + "Selecting previously unselected package python3-xdg.\n", + "Preparing to unpack .../35-python3-xdg_0.25-4ubuntu1.1_all.deb ...\n", + "Unpacking python3-xdg (0.25-4ubuntu1.1) ...\n", + "Setting up python-idna (2.6-1) ...\n", + "Setting up python-pip-whl (9.0.1-2.3~ubuntu1.18.04.5) ...\n", + "Setting up python-asn1crypto (0.24.0-1) ...\n", + "Setting up python-crypto (2.6.1-8ubuntu2) ...\n", + "Setting up python3-cffi-backend (1.11.5-1) ...\n", + "Setting up python3-crypto (2.6.1-8ubuntu2) ...\n", + "Setting up python3-idna (2.6-1) ...\n", + "Setting up python3-xdg (0.25-4ubuntu1.1) ...\n", + "Setting up python-wheel (0.30.0-0.2) ...\n", + "Setting up python3-six (1.11.0-2) ...\n", + "Setting up libpython-all-dev:amd64 (2.7.15~rc1-1) ...\n", + "Setting up python3-wheel (0.30.0-0.2) ...\n", + "Setting up python3-pkg-resources (39.0.1-2) ...\n", + "Setting up python-pkg-resources (39.0.1-2) ...\n", + "Setting up python3-asn1crypto (0.24.0-1) ...\n", + "Setting up python-cffi-backend (1.11.5-1) ...\n", + "Setting up python-gi (3.26.1-2ubuntu1) ...\n", + "Setting up python-six (1.11.0-2) ...\n", + "Setting up python-enum34 (1.1.6-2) ...\n", + "Setting up python3-pip (9.0.1-2.3~ubuntu1.18.04.5) ...\n", + "Setting up python3-setuptools (39.0.1-2) ...\n", + "Setting up python-dbus (1.2.6-1) ...\n", + "Setting up python-ipaddress (1.0.17-1) ...\n", + "Setting up python-pip (9.0.1-2.3~ubuntu1.18.04.5) ...\n", + "Setting up python-all (2.7.15~rc1-1) ...\n", + "Setting up python-xdg (0.25-4ubuntu1.1) ...\n", + "Setting up python3-cryptography (2.1.4-1ubuntu1.4) ...\n", + "Setting up python-setuptools (39.0.1-2) ...\n", + "Setting up python3-keyrings.alt (3.0-1) ...\n", + "Setting up python-keyrings.alt (3.0-1) ...\n", + "Setting up python-all-dev (2.7.15~rc1-1) ...\n", + "Setting up python-cryptography (2.1.4-1ubuntu1.4) ...\n", + "Setting up python3-secretstorage (2.3.1-2) ...\n", + "Setting up python-secretstorage (2.3.1-2) ...\n", + "Setting up python-keyring (10.6.0-1) ...\n", + "Setting up python3-keyring (10.6.0-1) ...\n", + "Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HyVFs0O26O2M", + "outputId": "363b4837-e98e-401a-95c3-a5b12221a6de", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 762 + } + }, + "source": [ + "!pip uninstall librosa -y\n", + "!pip install \"librosa==0.7.2\" \n", + "!pip3 install pathlib2\n", + "!pip install numba==0.48" + ], + "execution_count": 22, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Uninstalling librosa-0.7.2:\n", + " Successfully uninstalled librosa-0.7.2\n", + "Processing /root/.cache/pip/wheels/4c/6e/d7/bb93911540d2d1e44d690a1561871e5b6af82b69e80938abef/librosa-0.7.2-cp37-none-any.whl\n", + "Requirement already satisfied: six>=1.3 in /usr/local/lib/python3.7/dist-packages (from librosa==0.7.2) (1.15.0)\n", + "Requirement already satisfied: scikit-learn!=0.19.0,>=0.14.0 in /usr/local/lib/python3.7/dist-packages (from librosa==0.7.2) (0.22.2.post1)\n", + "Requirement already satisfied: joblib>=0.12 in /usr/local/lib/python3.7/dist-packages (from librosa==0.7.2) (1.0.1)\n", + "Requirement already satisfied: audioread>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from librosa==0.7.2) (2.1.9)\n", + "Requirement already satisfied: decorator>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from librosa==0.7.2) (4.4.2)\n", + "Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from librosa==0.7.2) (1.4.1)\n", + "Requirement already satisfied: resampy>=0.2.2 in /usr/local/lib/python3.7/dist-packages (from librosa==0.7.2) (0.2.2)\n", + "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from librosa==0.7.2) (1.19.5)\n", + "Requirement already satisfied: soundfile>=0.9.0 in /usr/local/lib/python3.7/dist-packages (from librosa==0.7.2) (0.10.3.post1)\n", + "Requirement already satisfied: numba>=0.43.0 in /usr/local/lib/python3.7/dist-packages (from librosa==0.7.2) (0.51.2)\n", + "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.7/dist-packages (from soundfile>=0.9.0->librosa==0.7.2) (1.14.6)\n", + "Requirement already satisfied: llvmlite<0.35,>=0.34.0.dev0 in /usr/local/lib/python3.7/dist-packages (from numba>=0.43.0->librosa==0.7.2) (0.34.0)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from numba>=0.43.0->librosa==0.7.2) (57.2.0)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.7/dist-packages (from cffi>=1.0->soundfile>=0.9.0->librosa==0.7.2) (2.20)\n", + "Installing collected packages: librosa\n", + "Successfully installed librosa-0.7.2\n", + "Requirement already satisfied: pathlib2 in /usr/local/lib/python3.7/dist-packages (2.3.6)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from pathlib2) (1.15.0)\n", + "Collecting numba==0.48\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/39/dc/5ce4a94d98e8a31cab21b150e23ca2f09a7dd354c06a69f71801ecd890db/numba-0.48.0-1-cp37-cp37m-manylinux2014_x86_64.whl (3.5MB)\n", + "\u001b[K |████████████████████████████████| 3.6MB 4.3MB/s \n", + "\u001b[?25hRequirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from numba==0.48) (57.2.0)\n", + "Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.7/dist-packages (from numba==0.48) (1.19.5)\n", + "Collecting llvmlite<0.32.0,>=0.31.0dev0\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/a0/10/d02c0ac683fc47ecda3426249509cf771d748b6a2c0e9d5ebbee76a7b80a/llvmlite-0.31.0-cp37-cp37m-manylinux1_x86_64.whl (20.2MB)\n", + "\u001b[K |████████████████████████████████| 20.2MB 1.3MB/s \n", + "\u001b[?25hInstalling collected packages: llvmlite, numba\n", + " Found existing installation: llvmlite 0.34.0\n", + " Uninstalling llvmlite-0.34.0:\n", + " Successfully uninstalled llvmlite-0.34.0\n", + " Found existing installation: numba 0.51.2\n", + " Uninstalling numba-0.51.2:\n", + " Successfully uninstalled numba-0.51.2\n", + "Successfully installed llvmlite-0.31.0 numba-0.48.0\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "llvmlite", + "numba" + ] + } + } + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:26:55.963169Z", + "start_time": "2019-05-14T04:26:50.333533Z" + }, + "id": "76mZwrEE6O2M" + }, + "source": [ + "import tensorflow as tf\n", + "import tensorflow_datasets as tfds\n", + "from tensorflow.io import FixedLenFeature, parse_single_example\n", + "from librosa.core.time_frequency import mel_frequencies\n", + "from pathlib2 import Path\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ], + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:26:56.068693Z", + "start_time": "2019-05-14T04:26:55.984505Z" + }, + "id": "VKk8CslN6O2N", + "outputId": "adddf541-3d70-4244-beee-695e54474395", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "print(tf.__version__)" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "2.5.0\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PgFB4m9B6O2P" + }, + "source": [ + "### Download or load dataset\n", + "Tensorflow datasets will automatically download or load the dataset for you at this location" + ] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:26:56.152232Z", + "start_time": "2019-05-14T04:26:56.072195Z" + }, + "id": "lhLGzk4o6O2P", + "outputId": "5b64080f-8e2c-43bb-95ee-28c18ef9efcf", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "DATA_DIR = Path(\"data\").resolve()\n", + "DATA_DIR" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "PosixPath('/content/data')" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:26:57.433005Z", + "start_time": "2019-05-14T04:26:56.154669Z" + }, + "id": "l77eVYjy6O2Q", + "outputId": "0c04c13e-7f3b-4d70-e291-88b1f763c61d", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 155, + "referenced_widgets": [ + "7cdc8e7bc48945d99626f03332d3f223", + "80aed0544eca450fb2bbbf5af130e2b5", + "c18aef7b882b44b78c3f4f394ca68124", + "12444d3318214c9d9f73c84948844cec", + "65f6a5e01f854fdaa1ed5b1f9c005f42", + "9594878a78314303a905d82149b1f4a6", + "33e7695cba0a4f1a9c40b184177d0036", + "4577a09fed0a478eb7265636e54c293f" + ] + } + }, + "source": [ + "ds_train, ds_test = tfds.load(\n", + " name=\"nsynth\", split=[\"train\", \"test\"], data_dir=DATA_DIR\n", + ")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\u001b[1mDownloading and preparing dataset nsynth/full/2.3.3 (download: 73.07 GiB, generated: 73.09 GiB, total: 146.16 GiB) to /content/data/nsynth/full/2.3.3...\u001b[0m\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "WARNING:absl:Dataset nsynth is hosted on GCS. It will automatically be downloaded to your\n", + "local data directory. If you'd instead prefer to read directly from our public\n", + "GCS bucket (recommended if you're running on GCP), you can instead pass\n", + "`try_gcs=True` to `tfds.load` or set `data_dir=gs://tfds-data/datasets`.\n", + "\n" + ], + "name": "stderr" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7cdc8e7bc48945d99626f03332d3f223", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, description='Dl Completed...', max=1069.0, style=ProgressStyle(descrip…" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:26:57.446559Z", + "start_time": "2019-05-14T04:26:57.436208Z" + }, + "id": "H1Ka9FPF6O2Q" + }, + "source": [ + "ds_train" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tDsqVgIR6O2R" + }, + "source": [ + "### Prepare spectrogramming and parameters" + ] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:26:57.559856Z", + "start_time": "2019-05-14T04:26:57.448599Z" + }, + "code_folding": [], + "id": "5v6AkD4j6O2R" + }, + "source": [ + "def _normalize_tensorflow(S, hparams):\n", + " return tf.clip_by_value((S - hparams.min_level_db) / -hparams.min_level_db, 0, 1)\n", + "\n", + "def _tf_log10(x):\n", + " numerator = tf.math.log(x)\n", + " denominator = tf.math.log(tf.constant(10, dtype=numerator.dtype))\n", + " return numerator / denominator\n", + "\n", + "\n", + "def _amp_to_db_tensorflow(x):\n", + " return 20 * _tf_log10(tf.clip_by_value(tf.abs(x), 1e-5, 1e100))\n", + "\n", + "\n", + "def _stft_tensorflow(signals, hparams):\n", + " return tf.signal.stft(\n", + " signals,\n", + " hparams.win_length,\n", + " hparams.hop_length,\n", + " hparams.n_fft,\n", + " pad_end=True,\n", + " window_fn=tf.signal.hann_window,\n", + " )\n", + "\n", + "\n", + "def spectrogram_tensorflow(y, hparams):\n", + " D = _stft_tensorflow(y, hparams)\n", + " S = _amp_to_db_tensorflow(tf.abs(D)) - hparams.ref_level_db\n", + " return _normalize_tensorflow(S, hparams)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:26:57.651322Z", + "start_time": "2019-05-14T04:26:57.561803Z" + }, + "id": "IHmb8R3t6O2S" + }, + "source": [ + "class HParams(object):\n", + " \"\"\" Hparams was removed from tf 2.0alpha so this is a placeholder\n", + " \"\"\"\n", + " def __init__(self, **kwargs):\n", + " self.__dict__.update(kwargs)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:26:57.742833Z", + "start_time": "2019-05-14T04:26:57.653238Z" + }, + "id": "S7_zdc636O2S" + }, + "source": [ + "hparams = HParams( \n", + " # network\n", + " batch_size = 32,\n", + " # spectrogramming\n", + " sample_rate = 16000,\n", + " create_spectrogram = True,\n", + " win_length = 1024,\n", + " n_fft = 1024,\n", + " hop_length= 400,\n", + " ref_level_db = 50,\n", + " min_level_db = -100,\n", + " # mel scaling\n", + " num_mel_bins = 128,\n", + " mel_lower_edge_hertz = 0,\n", + " mel_upper_edge_hertz = 8000,\n", + " # inversion\n", + " power = 1.5, # for spectral inversion\n", + " griffin_lim_iters = 50,\n", + " pad=True,\n", + " #\n", + ")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-13T19:31:28.258830Z", + "start_time": "2019-05-13T19:31:28.254192Z" + }, + "id": "AZQaBRNa6O2T" + }, + "source": [ + "### Create the dataset class" + ] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:26:57.881149Z", + "start_time": "2019-05-14T04:26:57.745008Z" + }, + "id": "2m4m12Vu6O2T" + }, + "source": [ + "class NSynthDataset(object):\n", + " def __init__(\n", + " self,\n", + " tf_records,\n", + " hparams,\n", + " is_training=True,\n", + " test_size = 1000,\n", + " prefetch=1000, # how many spectrograms to prefetch\n", + " num_parallel_calls=10, # how many parallel threads should be preparing data\n", + " n_samples = 305979, # how many items are in the dataset\n", + " shuffle_buffer = 1000,\n", + " ):\n", + " self.is_training = is_training\n", + " self.nsamples = n_samples\n", + " self.test_size = test_size\n", + " self.hparams = hparams\n", + " self.prefetch = prefetch\n", + " self.shuffle_buffer = shuffle_buffer\n", + " # prepare for mel scaling\n", + " if self.hparams.create_spectrogram:\n", + " self.mel_matrix = self._make_mel_matrix()\n", + " # create dataset of tfrecords\n", + " self.raw_dataset = tf.data.TFRecordDataset(tf_records)\n", + " # prepare dataset iterations\n", + " self.dataset = self.raw_dataset.map(\n", + " lambda x: self._parse_function(x), num_parallel_calls=num_parallel_calls\n", + " )\n", + " # make and split train and test datasets\n", + " self.prepare_datasets()\n", + " \n", + " def prepare_datasets(self):\n", + " # Note: there are better ways to do batching and shuffling here:\n", + " # https://www.tensorflow.org/guide/performance/datasets\n", + " \n", + " self.dataset_train = self.dataset.skip(self.test_size)\n", + " self.dataset_train = self.dataset_train.shuffle(self.shuffle_buffer)\n", + " self.dataset_train = self.dataset_train.prefetch(self.prefetch)\n", + " self.dataset_train = self.dataset_train.batch(hparams.batch_size)\n", + " \n", + " \n", + " self.dataset_test = self.dataset.take(self.test_size)\n", + " self.dataset_test = self.dataset_test.shuffle(self.shuffle_buffer)\n", + " self.dataset_test = self.dataset_test.prefetch(self.prefetch)\n", + " self.dataset_test = self.dataset_test.batch(hparams.batch_size)\n", + " \n", + " \n", + " \n", + " def _make_mel_matrix(self):\n", + " # create mel matrix\n", + " mel_matrix = tf.signal.linear_to_mel_weight_matrix(\n", + " num_mel_bins=self.hparams.num_mel_bins,\n", + " num_spectrogram_bins=int(self.hparams.n_fft / 2) + 1,\n", + " sample_rate=self.hparams.sample_rate,\n", + " lower_edge_hertz=self.hparams.mel_lower_edge_hertz,\n", + " upper_edge_hertz=self.hparams.mel_upper_edge_hertz,\n", + " dtype=tf.dtypes.float32,\n", + " name=None,\n", + " )\n", + " # gets the center frequencies of mel bands\n", + " mel_f = mel_frequencies(\n", + " n_mels=hparams.num_mel_bins + 2,\n", + " fmin=hparams.mel_lower_edge_hertz,\n", + " fmax=hparams.mel_upper_edge_hertz,\n", + " )\n", + " # Slaney-style mel is scaled to be approx constant energy per channel (from librosa)\n", + " enorm = tf.dtypes.cast(\n", + " tf.expand_dims(tf.constant(2.0 / (mel_f[2 : hparams.num_mel_bins + 2] - mel_f[:hparams.num_mel_bins])), 0),\n", + " tf.float32,\n", + " )\n", + " # normalize matrix\n", + " mel_matrix = tf.multiply(mel_matrix, enorm)\n", + " mel_matrix = tf.divide(mel_matrix, tf.reduce_sum(mel_matrix, axis=0))\n", + " \n", + " return mel_matrix\n", + "\n", + " def print_feature_list(self):\n", + " # get the features\n", + " element = list(self.raw_dataset.take(count=1))[0]\n", + " # parse the element in to the example message\n", + " example = tf.train.Example()\n", + " example.ParseFromString(element.numpy())\n", + " print(list(example.features.feature))\n", + "\n", + " def _parse_function(self, example_proto):\n", + " \"\"\" There are a number of features here (listed above). \n", + " I'm only grabbing a few here\n", + " \"\"\"\n", + " features = {\n", + " \"id\": FixedLenFeature([], dtype=tf.string),\n", + " \"pitch\": FixedLenFeature([1], dtype=tf.int64),\n", + " \"velocity\": FixedLenFeature([1], dtype=tf.int64),\n", + " \"audio\": FixedLenFeature([64000], dtype=tf.float32),\n", + " \"instrument/source\": FixedLenFeature([1], dtype=tf.int64),\n", + " \"instrument/family\": FixedLenFeature([1], dtype=tf.int64),\n", + " \"instrument/label\": FixedLenFeature([1], dtype=tf.int64),\n", + " }\n", + " example = parse_single_example(example_proto, features)\n", + "\n", + " if self.hparams.create_spectrogram:\n", + " # create spectrogram\n", + " example[\"spectrogram\"] = spectrogram_tensorflow(\n", + " example[\"audio\"], self.hparams\n", + " )\n", + " # create melspectrogram\n", + " example[\"spectrogram\"] = tf.expand_dims(\n", + " tf.transpose(tf.tensordot(\n", + " example[\"spectrogram\"], self.mel_matrix, 1\n", + " )), axis=2\n", + " )\n", + " \n", + " return example" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dRKkVxrB6O2Z" + }, + "source": [ + "### Produce the dataset from tfrecords" + ] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T00:45:23.645916Z", + "start_time": "2019-05-14T00:45:23.640539Z" + }, + "id": "PQeZIcGT6O2b" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:26:57.995096Z", + "start_time": "2019-05-14T04:26:57.883022Z" + }, + "id": "4mhYKPAI6O2b" + }, + "source": [ + "training_tfrecords = [str(i) for i in list((DATA_DIR / \"nsynth\").glob('**/*train.tfrecord*'))]" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:26:58.066926Z", + "start_time": "2019-05-14T04:26:57.997209Z" + }, + "id": "LRC4n39J6O2b" + }, + "source": [ + "hparams.batch_size = 32" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:26:58.822190Z", + "start_time": "2019-05-14T04:26:58.069025Z" + }, + "id": "1YQI11qY6O2c" + }, + "source": [ + "dset = NSynthDataset(training_tfrecords, hparams)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:26:58.834053Z", + "start_time": "2019-05-14T04:26:58.828359Z" + }, + "id": "kWrDpYtD6O2c" + }, + "source": [ + "dset" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PQqiUJ396O2c" + }, + "source": [ + "### Test plot an example from the dataset" + ] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:27:02.046893Z", + "start_time": "2019-05-14T04:26:58.837749Z" + }, + "id": "_fApN3Ra6O2c" + }, + "source": [ + "ex = next(iter(dset.dataset_test))\n", + "np.shape(ex[\"spectrogram\"].numpy())" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:28:35.761169Z", + "start_time": "2019-05-14T04:28:35.355846Z" + }, + "id": "qXD5xiN76O2d" + }, + "source": [ + "fig, ax = plt.subplots(ncols=1, figsize=(15,4))\n", + "cax = ax.matshow(np.squeeze(ex[\"spectrogram\"].numpy()[10]), aspect='auto', origin='lower')\n", + "fig.colorbar(cax)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:28:39.985812Z", + "start_time": "2019-05-14T04:28:39.980972Z" + }, + "id": "8fYe750b6O2d" + }, + "source": [ + "### test how fast we can iterate over the dataset\n", + "This value is around 15 iterations/second on my computer locally with 10 parallel threads. It might be slower on colab or other free resources. " + ] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:28:40.757052Z", + "start_time": "2019-05-14T04:28:40.751528Z" + }, + "id": "LjDusuLl6O2d" + }, + "source": [ + "from tqdm.autonotebook import tqdm" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2019-05-14T04:29:59.782683Z", + "start_time": "2019-05-14T04:28:46.857524Z" + }, + "id": "idKaWc3-6O2e" + }, + "source": [ + "for batch, train_x in tqdm(\n", + " zip(range(1000), dset.dataset_train), total=1000):\n", + " continue" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "ofR01M7G6O2e" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/Questions/README.md b/Questions/README.md new file mode 100644 index 0000000..30a47af --- /dev/null +++ b/Questions/README.md @@ -0,0 +1,2 @@ +#1- [About solutions for low Disk space on Google Colaboratory free server](https://stackoverflow.com/questions/68428163/about-solutions-for-low-disk-space-on-google-colaboratory-free-server) +![image](https://user-images.githubusercontent.com/6679151/126063817-7036ce0e-ec4e-4cf2-81d8-6dfaa031274b.png) diff --git a/readme.md b/readme.md index 534a9da..87269eb 100644 --- a/readme.md +++ b/readme.md @@ -8,38 +8,38 @@ Generative models in Tensorflow 2 This is a small project to implement a number of generative models in Tensorflow 2. Layers and optimizers use Keras. The models are implemented for two datasets: [fashion MNIST](https://github.com/zalandoresearch/fashion-mnist), and [NSYNTH](https://magenta.tensorflow.org/datasets/nsynth). Networks were written with the goal of being as simple and consistent as possible while still being readable. Because each network is self contained within the notebook, they should be easily run in a colab session. ## Included models: -### Autoencoder (AE) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/timsainb/tensorflow2-generative-models/blob/master/0.0-Autoencoder-fashion-mnist.ipynb) +### Autoencoder (AE) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Musical-Projects/tensorflow2-generative-models/blob/master/0.0-Autoencoder-fashion-mnist.ipynb) A simple autoencoder network. ![an autoencoder](imgs/ae.png) -### Variational Autoencoder (VAE) ([article](https://arxiv.org/abs/1312.6114)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/timsainb/tensorflow2-generative-models/blob/master/1.0-Variational-Autoencoder-fashion-mnist.ipynb) +### Variational Autoencoder (VAE) ([article](https://arxiv.org/abs/1312.6114)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Musical-Projects/tensorflow2-generative-models/blob/master/1.0-Variational-Autoencoder-fashion-mnist.ipynb) The original variational autoencoder network, using [tensorflow_probability](https://github.com/tensorflow/probability) ![variational autoencoder](imgs/vae.png) -### Generative Adversarial Network (GAN) ([article](https://arxiv.org/abs/1406.2661)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/timsainb/tensorflow2-generative-models/blob/master/2.0-GAN-fashion-mnist.ipynb) +### Generative Adversarial Network (GAN) ([article](https://arxiv.org/abs/1406.2661)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Musical-Projects/tensorflow2-generative-models/blob/master/2.0-GAN-fashion-mnist.ipynb) GANs are a form of neural network in which two sub-networks (the encoder and decoder) are trained on opposing loss functions: an encoder that is trained to produce data which is indiscernable from the true data, and a decoder that is trained to discriminate between the data and generated data. ![gan](imgs/gan.png) -### Wasserstein GAN with Gradient Penalty (WGAN-GP) ([article](https://arxiv.org/abs/1701.07875)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/timsainb/tensorflow2-generative-models/blob/master/3.0-WGAN-GP-fashion-mnist.ipynb) +### Wasserstein GAN with Gradient Penalty (WGAN-GP) ([article](https://arxiv.org/abs/1701.07875)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Musical-Projects/tensorflow2-generative-models/blob/master/3.0-WGAN-GP-fashion-mnist.ipynb) WGAN-GP is a GAN that improves over the original loss function to improve training stability. ![wgan gp](imgs/gan.png) -### VAE-GAN ([article](https://arxiv.org/abs/1512.09300)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/timsainb/tensorflow2-generative-models/blob/master/6.0-VAE-GAN-fashion-mnist.ipynb) +### VAE-GAN ([article](https://arxiv.org/abs/1512.09300)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Musical-Projects/tensorflow2-generative-models/blob/master/6.0-VAE-GAN-fashion-mnist.ipynb) VAE-GAN combines the VAE and GAN to autoencode over a latent representation of data in the generator to improve over the pixelwise error function used in autoencoders. ![vae gan](imgs/vaegan.png) -### Generative adversarial interpolative autoencoder (GAIA) ([article](https://arxiv.org/abs/1807.06650)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/timsainb/tensorflow2-generative-models/blob/master/5.0-GAIA-fashion-mnist.ipynb) +### Generative adversarial interpolative autoencoder (GAIA) ([article](https://arxiv.org/abs/1807.06650)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Musical-Projects/tensorflow2-generative-models/blob/master/5.0-GAIA-fashion-mnist.ipynb) GAIA is an autoencoder trained to learn convex latent representations by adversarially training on interpolations in latent space projections of real data. This is an experimental modification of the original algorithm. For the original algorithm, see here: https://github.com/timsainb/gaia @@ -47,20 +47,20 @@ GAIA is an autoencoder trained to learn convex latent representations by adversa ## Other Notebooks: -### Seq2Seq Autoencoder (without attention) (Fasion MNIST: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/timsainb/tensorflow2-generative-models/blob/master/4.0-seq2seq-fashion-mnist.ipynb) | NSYNTH: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/timsainb/tensorflow2-generative-models/blob/master/9.0-seq2seq-NSYNTH.ipynb)) +### Seq2Seq Autoencoder (without attention) (Fasion MNIST: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Musical-Projects/tensorflow2-generative-models/blob/master/4.0-seq2seq-fashion-mnist.ipynb) | NSYNTH: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Musical-Projects/tensorflow2-generative-models/blob/master/9.0-seq2seq-NSYNTH.ipynb)) Seq2Seq models use recurrent neural network cells (like LSTMs) to better capture sequential organization in data. This implementation uses Convolutional Layers as input to the LSTM cells, and a single Bidirectional LSTM layer. ![a seq2seq bidirectional lstm in tensorflow 2.0](imgs/seq2seq.png) -### Spectrogramming, Mel Scaling, MFCCs, and Inversion in Tensorflow [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/timsainb/tensorflow2-generative-models/blob/master/7.0-Tensorflow-spectrograms-and-inversion.ipynb) +### Spectrogramming, Mel Scaling, MFCCs, and Inversion in Tensorflow [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Musical-Projects/tensorflow2-generative-models/blob/master/7.0-Tensorflow-spectrograms-and-inversion.ipynb) Tensorflow has a signal processing package that allows us to generate spectrograms from waveforms as part of our dataset iterator, rather than pregenerating a second spectrogram dataset. This notebook can serve as a reference for how this is done. Spectrogram inversion is done using the Griffin-Lim algorithm. ![spectrogram inversion in tensorflow 2.0](imgs/spectrogram-inversion.png) -### Iterator for NSynth [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/timsainb/tensorflow2-generative-models/blob/master/8.0-NSYNTH-iterator.ipynb) +### Iterator for NSynth [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Musical-Projects/tensorflow2-generative-models/blob/master/8.0-NSYNTH-iterator.ipynb) The NSYNTH dataset is a set of thousands of musical notes saved as waveforms. To input these into a Seq2Seq model as spectrograms, I wrote a small dataset class that converts to spectrogram in tensorflow (using the code from the spectrogramming notebook).