forked from piaskowyk/twitter-posts-analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdatabase.py
More file actions
78 lines (69 loc) · 3.23 KB
/
Copy pathdatabase.py
File metadata and controls
78 lines (69 loc) · 3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import psycopg2
import nltk
import re
from nltk.sentiment.vader import SentimentIntensityAnalyzer
class Database:
connection = None
db = None
sentiment_analyzer = None
def __init__(self):
self.connection = psycopg2.connect(
database="postgres",
user="postgres",
password="password",
host="127.0.0.1",
port="5432"
)
self.db = self.connection.cursor()
nltk.download('vader_lexicon')
self.sentiment_analyzer = SentimentIntensityAnalyzer()
def __del__(self):
self.db.close()
self.connection.close()
def insert_twitter_batch(self, json_data):
for user in json_data['globalObjects']['users'].values():
user_id = user.get('id', user['id_str'])
self.db.execute(
'INSERT INTO "user" '
'(id, name, location, description, followers_count) '
'VALUES(%s, %s, %s, %s, %s) ON CONFLICT DO NOTHING',
(user_id, user['name'], user['location'], user['description'], user['followers_count'])
)
self.connection.commit()
for tweet in json_data['globalObjects']['tweets'].values():
tags = None
if 'hashtags' in tweet['entities']:
for tag in tweet['entities']['hashtags']:
if tags is None:
tags = tag['text']
else:
tags += ',' + tag['text']
regex = r"\@[\w]*"
clear_tweet = re.sub(regex, '', tweet['full_text'], 0, re.MULTILINE)
sentiment_result = self.sentiment_analyzer.polarity_scores(clear_tweet)
reply_to = tweet.get('in_reply_to_status_id_str', None)
tweet_id = tweet.get('id', tweet['id_str'])
user_id = tweet.get('user_id', tweet['user_id_str'])
fetched_comments = True if reply_to else None
self.db.execute(
'INSERT INTO tweet '
'(id, content, user_id, created_at, tags, sentiment_neg, sentiment_neu, sentiment_pos, '
'sentiment_compound, retweet_count, favorite_count, reply_count, quote_count, reply_to, '
'fetched_comments) '
'VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT DO NOTHING',
(tweet_id, tweet['full_text'], user_id, tweet['created_at'], tags,
sentiment_result['neg'], sentiment_result['neu'],
sentiment_result['pos'], sentiment_result['compound'],
tweet['retweet_count'], tweet['favorite_count'], tweet['reply_count'], tweet['quote_count'],
reply_to, fetched_comments)
)
self.connection.commit()
def exists_tweet(self, tweet_id):
self.db.execute("SELECT * FROM tweet WHERE id = %s", (tweet_id,))
return self.db.fetchone() is not None
def get_tweets_to_comment_fetch(self):
self.db.execute("SELECT * FROM tweet WHERE fetched_comments is NULL")
return self.db.fetchall()
def set_as_fetched_comments(self, tweet):
self.db.execute(f"update tweet set fetched_comments = true where id = {tweet[0]} and reply_to is NULL")
self.connection.commit()