-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathblogme.py
More file actions
142 lines (101 loc) · 3.1 KB
/
blogme.py
File metadata and controls
142 lines (101 loc) · 3.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 31 17:42:51 2023
@author: Mandar
"""
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
#reading excel or xlsx files
data = pd.read_excel('articles.xlsx')
#summary of the data
data.describe()
#summary of the columns
data.info()
#counting the number of articles per source
#format of groupby: df.groupby(['column_to_group])['columns_to_count'].count()
data.groupby(['source_id'])['article_id'].count()
#number of reactions by publisher
data.groupby(['source_id'])['engagement_reaction_count'].sum()
#dropping a colun
data = data.drop('engagement_comment_plugin_count', axis=1)
#functions in Python
def thisFunction():
print('This is my first function')
thisFunction()
#This is a function with variables
def aboutMe(name, surname, location):
print('This is '+name+' My surname is '+surname+' I am from '+location)
return name, surname, location
a = aboutMe('Mandar', 'Nadkarni', 'India')
#Using for loops in functions
def favfood(food):
for x in food:
print('Top food is '+x)
fastfood = ['salad', 'water', 'fruit']
favfood(fastfood)
#creating a keyword flag
keyword = 'crash'
#lets create a for loop to isolate each title row
# length = len(data)
# keyword_flag = []
# for x in range(0, length):
# heading = data['title'][x]
# if keyword in heading:
# flag = 1
# else:
# flag = 0
# keyword_flag.append(flag)
#creating a function
def keywordflag(keyword):
length = len(data)
keyword_flag = []
for x in range(0, length):
heading = data['title'][x]
try:
if keyword in heading:
flag = 1
else:
flag = 0
except:
flag = 0
keyword_flag.append(flag)
return keyword_flag
keywordflag = keywordflag('murder')
#creating a new column in data dataframe
data['keyword_flag'] = pd.Series(keywordflag)
#SentimentIntensityAnalyzer
sent_int = SentimentIntensityAnalyzer()
text = data['title'][16]
sent = sent_int.polarity_scores(text)
neg = sent['neg']
pos = sent['pos']
neu = sent['neu']
#adding a for loop to extract sentiment per title
title_neg_sentiment = []
title_pos_sentiment = []
title_neu_sentiment = []
length = len(data)
for x in range(0,length):
try:
text = data['title'][x]
sent_int = SentimentIntensityAnalyzer()
sent = sent_int.polarity_scores(text)
neg = sent['neg']
pos = sent['pos']
neu = sent['neu']
except:
neg = 0
pos = 0
neu = 0
title_neg_sentiment.append(neg)
title_pos_sentiment.append(pos)
title_neu_sentiment.append(neu)
title_neg_sentiment = pd.Series(title_neg_sentiment)
title_pos_sentiment = pd.Series(title_pos_sentiment)
title_neu_sentiment = pd.Series(title_neu_sentiment)
data['title_neg_sentiment'] = title_neg_sentiment
data['title_pos_sentiment'] = title_pos_sentiment
data['title_neu_sentiment'] = title_neu_sentiment
#Writing the data
data.to_excel('blogme_clean.xlsx', sheet_name='blogmedata', index=False)