-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSenti_analysis.py
More file actions
113 lines (94 loc) · 3.59 KB
/
Senti_analysis.py
File metadata and controls
113 lines (94 loc) · 3.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 3 14:43:30 2018
@author: yangl
"""
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 13 16:17:02 2018
@author: yangl
This a script for Sentiment Analysis with either vader or sentiwordnet
"""
import pandas as pd
import nltk
from nltk import tokenize
from nltk.corpus import sentiwordnet as swn
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA
#construct text1 with seven documents/sentences
text1="It was one of the worst movies I've seen, despite good reviews. \
Unbelievably Good acting!! Poor direction. VERY poor production. \
The movie was perfect. Very bad movie. VERY bad movie."
# function sort_by_sentiment will sort the sentences in text from positive to negative
def sort_by_sentiment(text,desc=True):
sia = SIA()
results = []
text_s=tokenize.sent_tokenize(text)
for s in text_s:
pol_score = sia.polarity_scores(s)
pol_score['headline'] = s
results.append(pol_score)
df = pd.DataFrame.from_records(results)
df.head()
sort_df=df.sort_values('compound',ascending= not desc)
return(sort_df)
tt=sort_by_sentiment(text1)
tt.head()
doc="Nice and friendly place with excellent food and friendly and helpful staff.\
You need a car though.\
I love this house very much.\
Playground and animals entertained them and they felt like at home.\
I also recommend the dinner! \
Great value for the price!\
This movies is bad, I will not recommend it to my friends"
def wordnet_sentAnalyzer(doc):
sentences = nltk.sent_tokenize(doc.lower())
stokens = [nltk.word_tokenize(sent) for sent in sentences]
taggedlist=[]
for stoken in stokens:
taggedlist.append(nltk.pos_tag(stoken))
wnl = nltk.WordNetLemmatizer()
score_list=[]
for ids,taggedsent in enumerate(taggedlist):
score_list.append([])
for idw,w in enumerate(taggedsent):
newtag=''
lemmatized=wnl.lemmatize(w[0])
#tag noun
if w[1].startswith('NN'):
newtag='n'
#tag adj.
elif w[1].startswith('JJ'):
newtag='a'
#tag verb
elif w[1].startswith('V'):
newtag='v'
#tag adv.
elif w[1].startswith('R'):
newtag='r'
else:
newtag=''
if(newtag!=''):
synsets = list(swn.senti_synsets(lemmatized, newtag))
#Getting average of all possible sentiments, as you requested
obj=0; pos=0; neg=0
if(len(synsets)>0):
for syn in synsets:
#objective score
obj+=syn.obj_score()
#positive score
pos+=syn.pos_score()
#negative score
neg+=syn.neg_score()
score_list[ids].append(list([obj/len(synsets),pos/len(synsets),neg/len(synsets)]))
sent_sentiment=[]; sentences = nltk.sent_tokenize(doc)
for idx,s in enumerate(score_list):
o=0; p=0; n=0
for w in s:
o+=w[0]; p+=w[1]; n+=w[2]
o=o/len(s); p=p/len(s); n=n/len(s)
sent_sentiment.append([sentences[idx],o,p,n])
#configure a pandas dataframe with each row contains a sentence and its sentiment socres
df=pd.DataFrame.from_records(sent_sentiment, columns=['Sentences', 'Objective','Positive','Negative'])
return(df)
df=wordnet_sentAnalyzer(doc)
print(df)