mlp_full = SentimentNetwork(reviews[:-1000],labels[:-1000],min_count=0,polarity_cutoff=0,learning_rate=0.01)
mlp_full.train(reviews[:-1000],labels[:-1000])
def get_most_similar_words(focus = "horrible"):
most_similar = Counter()
for word in mlp_full.word2index.keys():
most_similar[word] = np.dot(mlp_full.weights_0_1[mlp_full.word2index[word]],mlp_full.weights_0_1[mlp_full.word2index[focus]])
return most_similar.most_common()
get_most_similar_words("excellent")
get_most_similar_words("terrible")
import matplotlib.colors as colors
words_to_visualize = list()
for word, ratio in pos_neg_ratios.most_common(500):
if(word in mlp_full.word2index.keys()):
words_to_visualize.append(word)
for word, ratio in list(reversed(pos_neg_ratios.most_common()))[0:500]:
if(word in mlp_full.word2index.keys()):
words_to_visualize.append(word)
pos = 0
neg = 0
colors_list = list()
vectors_list = list()
for word in words_to_visualize:
if word in pos_neg_ratios.keys():
vectors_list.append(mlp_full.weights_0_1[mlp_full.word2index[word]])
if(pos_neg_ratios[word] > 0):
pos+=1
colors_list.append("#00ff00")
else:
neg+=1
colors_list.append("#000000")
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, random_state=0)
words_top_ted_tsne = tsne.fit_transform(vectors_list)
p = figure(tools="pan,wheel_zoom,reset,save",
toolbar_location="above",
title="vector T-SNE for most polarized words")
source = ColumnDataSource(data=dict(x1=words_top_ted_tsne[:,0],
x2=words_top_ted_tsne[:,1],
names=words_to_visualize,
color=colors_list))
p.scatter(x="x1", y="x2", size=8, source=source, fill_color="color")
word_labels = LabelSet(x="x1", y="x2", text="names", y_offset=6,
text_font_size="8pt", text_color="#555555",
source=source, text_align='center')
p.add_layout(word_labels)
show(p)
