Skip to content
Snippets Groups Projects
Commit ab6446a0 authored by Alessandro Elias's avatar Alessandro Elias
Browse files

Initial commit

parent b2dc7e56
No related branches found
No related tags found
No related merge requests found
import sys
import jsonpickle
import os
import tweepy
import csv
import json
import logging
logging.basicConfig(level=logging.INFO)
####input your credentials here
consumer_key = 'pcn6szyFLVnzxDclKazZ3tQPI'
consumer_secret = 'OGKptAjYnoh33mvPEkBPQgcApNXFysbCUx2CjQwjtHhr7Z9unO'
access_token = '1127995977963646977-vqZMlKE8vJLSFMWmL6J9ouEZBncPi1'
access_token_secret = 'xT6WMGkUZJULIMAwpG0AUWtOFYDdGMBYAFSHUZIo9NLYh'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
# a parte de baixo que é a realmente importante...
searchQuery = '((cota OR cotas OR universidade OR universidades) AND (racial OR raciais)) OR ((universidade OR universidades) AND (cota OR cotas)) '
# sugiro tirar o sincedate; assim fica menos feito o código. Eu também lembro de dar uns paus; e de qq forma
# a API só volta cerca de uma semana no passado mesmo
sincedate = "2018-11-20"
untildate = "2018-11-21"
maxTweets = 10000000
# Testar colocar esse limite. Só para eles não destruirem a cahve
tweetsPerQry = 100 # this is the max the API permits
# melhor parametrizar esse arquivos de saida
csvFile = open('cotas.csv', 'a')
jsonFile = open('cotas.json', 'a')
csvWriter = csv.writer(csvFile)
# If results from a specific ID onwards are reqd, set since_id to that ID.
# else default to no lower limit, go as far back as API allows
sinceId = None
# If results only below a specific ID are, set max_id to that ID.
# else default to no upper limit, start from the most recent tweet matching the search query.
max_id = -1L
#max_id = 1045463072670920704
tweetCount = 0
print("Downloading max {0} tweets".format(maxTweets))
while tweetCount < maxTweets:
try:
if (max_id <= 0):
if (not sinceId):
new_tweets = api.search(q=searchQuery, since = sincedate, until = untildate, count=tweetsPerQry)
else:
new_tweets = api.search(q=searchQuery, until = untildate, count=tweetsPerQry, since_id=sinceId)
else:
if (not sinceId):
new_tweets = api.search(q=searchQuery, since = sincedate, until = untildate, count=tweetsPerQry, max_id=str(max_id - 1))
else:
new_tweets = api.search(q=searchQuery, until = untildate, count=tweetsPerQry, max_id=str(max_id - 1), since_id=sinceId)
if not new_tweets:
print("No more tweets found")
break
for tweet in new_tweets:
json.dump(tweet._json, jsonFile)
jsonFile.write('\n')
# não coloquei todos os campos no csv
csvWriter.writerow([
tweet.created_at,
tweet.id,
tweet.in_reply_to_status_id,
tweet.in_reply_to_user_id,
tweet.in_reply_to_screen_name,
tweet.user.id, tweet.user.screen_name,
tweet.user.followers_count,
tweet.is_quote_status,
tweet.retweet_count,
tweet.favorite_count,
tweet.lang,
tweet.text.encode('utf-8')])
tweetCount += len(new_tweets)
print("Downloaded {0} tweets".format(tweetCount))
max_id = new_tweets[-1].id
except tweepy.TweepError as e:
print("some error : " + str(e))
continue
print ("Downloaded {0} tweets".format(tweetCount))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment