Skip to content

Commit c9af8ed

Browse files
committed
Deep Learning Foundation - Natural Language Processing with Tensorflow
Former-commit-id: 3fdbb8350f5401c8b45186c7aad74ae721e3f781 Former-commit-id: 893be406695d14599ca08caae63b9674261814be
1 parent 6ec1c06 commit c9af8ed

14 files changed

+15886
-26
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
{
2+
"nbformat": 4,
3+
"nbformat_minor": 0,
4+
"metadata": {
5+
"colab": {
6+
"name": "01_Word_Encoding.ipynb",
7+
"provenance": [],
8+
"collapsed_sections": []
9+
},
10+
"kernelspec": {
11+
"name": "python3",
12+
"display_name": "Python 3"
13+
}
14+
},
15+
"cells": [
16+
{
17+
"cell_type": "markdown",
18+
"metadata": {
19+
"id": "su3bfBDiRK9L"
20+
},
21+
"source": [
22+
"# Word Encodings"
23+
]
24+
},
25+
{
26+
"cell_type": "markdown",
27+
"metadata": {
28+
"id": "V1vpla79AfJj"
29+
},
30+
"source": [
31+
"## Import libraries and APIs"
32+
]
33+
},
34+
{
35+
"cell_type": "code",
36+
"metadata": {
37+
"id": "dVGySTYgyVgW"
38+
},
39+
"source": [
40+
"## import the tensorflow APIs\n",
41+
"\n",
42+
"import tensorflow as tf\n",
43+
"from tensorflow.keras.preprocessing.text import Tokenizer"
44+
],
45+
"execution_count": 1,
46+
"outputs": []
47+
},
48+
{
49+
"cell_type": "markdown",
50+
"metadata": {
51+
"id": "ogxDv27ZAiOc"
52+
},
53+
"source": [
54+
"## Define training sentences"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"metadata": {
60+
"id": "oC-bEci9Q-EI"
61+
},
62+
"source": [
63+
"##sentences to tokenize\n",
64+
"train_sentences = [\n",
65+
" 'It is a sunny day',\n",
66+
" 'It is also running',\n",
67+
" 'It is kinda snowy'\n",
68+
"]"
69+
],
70+
"execution_count": 2,
71+
"outputs": []
72+
},
73+
{
74+
"cell_type": "markdown",
75+
"metadata": {
76+
"id": "FPwnd4K1AjXM"
77+
},
78+
"source": [
79+
"## Set up the tokenizer"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"metadata": {
85+
"id": "GbEn11WiT5Sp"
86+
},
87+
"source": [
88+
"##instantiate the tokenizer\n",
89+
"tokenizer = Tokenizer(num_words = 100)\n",
90+
"\n",
91+
"##train the tokenizer on training sentences\n",
92+
"tokenizer.fit_on_texts(train_sentences)\n",
93+
"\n",
94+
"##store word index for the words in the sentence\n",
95+
"word_index = tokenizer.word_index\n"
96+
],
97+
"execution_count": 3,
98+
"outputs": []
99+
},
100+
{
101+
"cell_type": "code",
102+
"metadata": {
103+
"id": "0zZR31LAUM4p",
104+
"colab": {
105+
"base_uri": "https://localhost:8080/"
106+
},
107+
"outputId": "f0d8a87f-f9e7-4c81-8e00-d31abc87a974"
108+
},
109+
"source": [
110+
"print(word_index)"
111+
],
112+
"execution_count": 4,
113+
"outputs": [
114+
{
115+
"output_type": "stream",
116+
"name": "stdout",
117+
"text": [
118+
"{'it': 1, 'is': 2, 'a': 3, 'sunny': 4, 'day': 5, 'also': 6, 'running': 7, 'kinda': 8, 'snowy': 9}\n"
119+
]
120+
}
121+
]
122+
}
123+
]
124+
}

0 commit comments

Comments
 (0)