ptyadana
diff --git a/‎Tensorflow - Natural Language Proceessing with Tensorflow/01_Getting_Started_with_NLP/01_Word_Encoding_with_Tokenizer.ipynb
Lines changed: 124 additions & 0 deletions b/‎Tensorflow - Natural Language Proceessing with Tensorflow/01_Getting_Started_with_NLP/01_Word_Encoding_with_Tokenizer.ipynb
Lines changed: 124 additions & 0 deletions
@@ -0,0 +1,124 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "01_Word_Encoding.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "su3bfBDiRK9L"
+      },
+      "source": [
+        "# Word Encodings"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "V1vpla79AfJj"
+      },
+      "source": [
+        "## Import libraries and APIs"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "dVGySTYgyVgW"
+      },
+      "source": [
+        "## import the tensorflow APIs\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "from tensorflow.keras.preprocessing.text import Tokenizer"
+      ],
+      "execution_count": 1,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ogxDv27ZAiOc"
+      },
+      "source": [
+        "## Define training sentences"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "oC-bEci9Q-EI"
+      },
+      "source": [
+        "##sentences to tokenize\n",
+        "train_sentences = [\n",
+        "             'It is a sunny day',\n",
+        "             'It is also running',\n",
+        "             'It is kinda snowy'\n",
+        "]"
+      ],
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FPwnd4K1AjXM"
+      },
+      "source": [
+        "## Set up the tokenizer"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "GbEn11WiT5Sp"
+      },
+      "source": [
+        "##instantiate the tokenizer\n",
+        "tokenizer = Tokenizer(num_words = 100)\n",
+        "\n",
+        "##train the tokenizer on training sentences\n",
+        "tokenizer.fit_on_texts(train_sentences)\n",
+        "\n",
+        "##store word index for the words in the sentence\n",
+        "word_index = tokenizer.word_index\n"
+      ],
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "0zZR31LAUM4p",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "f0d8a87f-f9e7-4c81-8e00-d31abc87a974"
+      },
+      "source": [
+        "print(word_index)"
+      ],
+      "execution_count": 4,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "{'it': 1, 'is': 2, 'a': 3, 'sunny': 4, 'day': 5, 'also': 6, 'running': 7, 'kinda': 8, 'snowy': 9}\n"
+          ]
+        }
+      ]
+    }
+  ]
+}