-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
48 lines (38 loc) · 1.79 KB
/
app.py
File metadata and controls
48 lines (38 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import tweepy
import csv
import pandas as pd
import snscrape.modules.twitter as sntwitter
from secrets import consumer_key, consumer_secret
# get tweets with tweepy from the last 7 days
def get_tweets_tweepy(keyword, date):
# auth to granting access to API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
# wait_on_rate_limit is required b/c twitter has set a limit
api = tweepy.API(auth, wait_on_rate_limit=True)
scrapedTweets = []
# iterate through tweets using api search and save them to array
for tweet in tweepy.Cursor(api.search, until=date, q=keyword).items():
data = [tweet.created_at, tweet.id, tweet.geo, tweet.text, tweet.user._json['screen_name']]
data = tuple(data)
# We only want data that doesn't have retweets
if (not tweet.retweeted) and ('RT @' not in tweet.text):
scrapedTweets.append(data)
# export tweets to dataframe
df = pd.DataFrame(scrapedTweets, columns = ['datetime', 'tweet_id', 'geo','text', 'username'])
filename = keyword+'_no_rt_scrape'+'.csv'
df.to_csv(filename, index=False)
# get tweets with snscrape
def get_tweets_sn():
snScrapedTweets = []
# iterate through tweets using TwitterSearchScrapper and save them to array
for i, tweet in enumerate(sntwitter.TwitterSearchScraper('rollerblades since:2020-01-01 until:2020-12-01').get_items()):
if i>100000:
break
snScrapedTweets.append([tweet.date, tweet.id, tweet.content, tweet.username])
# export tweets to dataframe
df = pd.DataFrame(snScrapedTweets, columns = ['datetime', 'tweet_id', 'text', 'username'])
df.to_csv('2020_rollerbladessnscrape_tweets.csv', index=False)
#get tweets using Tweepy
#get_tweets_tweepy("rollerskating", '2020-01-01')
# get tweets using sn
get_tweets_sn()