In [5]:
import numpy as np
from emo_utils import *
import emoji
import matplotlib.pyplot as plt

%matplotlib inline

In [6]:
X_train, Y_train = read_csv('emojify_data.csv')

In [8]:
maxLen = len(max(X_train, key=len).split())

In [12]:
index = 99
print(X_train[index], label_to_emoji(Y_train[index]))

her smile is so charming üòÑ


In [17]:
word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('glove.6B/glove.6B.100d.txt')

In [14]:
import numpy as np
np.random.seed(0)
from keras.models import Model
from keras.layers import Dense, Input, Dropout, LSTM, Activation
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.initializers import glorot_uniform
np.random.seed(1)

In [15]:
def sentences_to_indices(X, word_to_index, max_len):   
    m = X.shape[0]                                
    X_indices = np.zeros((m, max_len))    
    for i in range(m):                            
        sentence_words = [w.lower() for w in X[i].split()]
        j = 0
        for w in sentence_words:
            X_indices[i, j] = word_to_index[w]
            j += 1    
    return X_indices

In [52]:
X1 = np.array(["haha", "lets play cricket", "tonight we are eating pizza"])
X1_indices = sentences_to_indices(X1,word_to_index, max_len = 5)
print("X1 =", X1)
print("X1_indices =", X1_indices)

X1 = ['haha' 'lets play cricket' 'tonight we are eating pizza']
X1_indices = [[171102.      0.      0.      0.      0.]
 [220930. 286375. 111805.      0.      0.]
 [361859. 384374.  58997. 132714. 285806.]]


In [19]:
def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    
    vocab_len = len(word_to_index) + 1                  
    emb_dim = word_to_vec_map["cucumber"].shape[0]     
    emb_matrix = np.zeros((vocab_len, emb_dim))
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word] 
    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)
    embedding_layer.build((None,))
    embedding_layer.set_weights([emb_matrix])
    
    return embedding_layer

In [26]:
def emoji_converter(input_shape, word_to_vec_map, word_to_index):
    sentence_indices = Input(input_shape, dtype='int32')
    embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)
    embeddings = embedding_layer(sentence_indices)   
    X = LSTM(128, return_sequences=True)(embeddings)
    X = Dropout(0.4)(X)
    X = LSTM(128, return_sequences=False)(X)
    X = Dropout(0.4)(X)
    X = Dense(7)(X)
    X = Activation('softmax')(X)
    model = Model(inputs=sentence_indices, outputs=X)   
    return model

In [27]:
model = emoji_converter((maxLen,), word_to_vec_map, word_to_index)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 10)                0         
_________________________________________________________________
embedding_3 (Embedding)      (None, 10, 100)           40000100  
_________________________________________________________________
lstm_5 (LSTM)                (None, 10, 128)           117248    
_________________________________________________________________
dropout_5 (Dropout)          (None, 10, 128)           0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dropout_6 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 7)                 903       
__________

In [32]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [33]:
X_train_indices = sentences_to_indices(X_train, word_to_index, maxLen)
Y_train_one_hot = convert_to_one_hot(Y_train, C = 7)

In [34]:
model.fit(X_train_indices, Y_train_one_hot, epochs = 50, batch_size = 32, shuffle=True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7ff57af44828>

In [63]:
x_test = np.array([['I do not like movies'],["I feel lonely"],["Let us go and watch football world cup tonight"],["Honey lets go out for a date"],["She is the most amazing girl"],["Happy birthday Raj"],["This is the best day of my life"],["My mom is the best"]])

In [64]:
for x in x_test:
    X_test_indices = sentences_to_indices(x, word_to_index, maxLen)
    print(x[0] +' '+  label_to_emoji(np.argmax(model.predict(X_test_indices))))

I do not like movies üòû
I feel lonely üòû
Let us go and watch football world cup tonight ‚öæ
Honey lets go out for a date üç¥
She is the most amazing girl ‚ù§Ô∏è
Happy birthday Raj üòÑ
This is the best day of my life üòÑ
My mom is the best ‚ù§Ô∏è
