Skip to content
Snippets Groups Projects
Commit 7930f8f1 authored by Alessandro's avatar Alessandro
Browse files

Close files

parent 9ceba722
No related branches found
No related tags found
No related merge requests found
...@@ -9,32 +9,14 @@ import json ...@@ -9,32 +9,14 @@ import json
import logging import logging
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
####input your credentials here def search(api, sincedate, untildate, csvFile, jsonFile):
consumer_key = 'pcn6szyFLVnzxDclKazZ3tQPI'
consumer_secret = 'OGKptAjYnoh33mvPEkBPQgcApNXFysbCUx2CjQwjtHhr7Z9unO'
access_token = '1127995977963646977-vqZMlKE8vJLSFMWmL6J9ouEZBncPi1'
access_token_secret = 'xT6WMGkUZJULIMAwpG0AUWtOFYDdGMBYAFSHUZIo9NLYh'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
# a parte de baixo que é a realmente importante... # a parte de baixo que é a realmente importante...
searchQuery = '((cota OR cotas OR universidade OR universidades) AND (racial OR raciais)) OR ((universidade OR universidades) AND (cota OR cotas)) ' searchQuery = '((cota OR cotas OR universidade OR universidades) AND (racial OR raciais)) OR ((universidade OR universidades) AND (cota OR cotas)) '
# sugiro tirar o sincedate; assim fica menos feito o código. Eu também lembro de dar uns paus; e de qq forma maxTweets = 1000000
# a API só volta cerca de uma semana no passado mesmo
sincedate = "2019-05-15"
untildate = "2019-05-16"
maxTweets = 10000000
# Testar colocar esse limite. Só para eles não destruirem a cahve
tweetsPerQry = 100 # this is the max the API permits tweetsPerQry = 100 # this is the max the API permits
# melhor parametrizar esse arquivos de saida
csvFile = open('cotas.csv', 'a')
jsonFile = open('cotas.json', 'a')
csvWriter = csv.writer(csvFile) csvWriter = csv.writer(csvFile)
# If results from a specific ID onwards are reqd, set since_id to that ID. # If results from a specific ID onwards are reqd, set since_id to that ID.
...@@ -46,8 +28,6 @@ sinceId = None ...@@ -46,8 +28,6 @@ sinceId = None
# Python3 has 9223372036854775807 as max number # Python3 has 9223372036854775807 as max number
max_id = sys.maxsize max_id = sys.maxsize
#max_id = 1045463072670920704
tweetCount = 0 tweetCount = 0
print("Downloading max {0} tweets".format(maxTweets)) print("Downloading max {0} tweets".format(maxTweets))
while tweetCount < maxTweets: while tweetCount < maxTweets:
...@@ -91,3 +71,35 @@ while tweetCount < maxTweets: ...@@ -91,3 +71,35 @@ while tweetCount < maxTweets:
print ("Downloaded {0} tweets".format(tweetCount)) print ("Downloaded {0} tweets".format(tweetCount))
if __name__ == "__main__":
####input your credentials here
consumer_key = 'pcn6szyFLVnzxDclKazZ3tQPI'
consumer_secret = 'OGKptAjYnoh33mvPEkBPQgcApNXFysbCUx2CjQwjtHhr7Z9unO'
access_token = '1127995977963646977-vqZMlKE8vJLSFMWmL6J9ouEZBncPi1'
access_token_secret = 'xT6WMGkUZJULIMAwpG0AUWtOFYDdGMBYAFSHUZIo9NLYh'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
# sugiro tirar o sincedate; assim fica menos feito o código. Eu também lembro de dar uns paus; e de qq forma
# a API só volta cerca de uma semana no passado mesmo
if( len(sys.argv) != 4):
print("Usage "+sys.argv[0]+" <sincedate> <untildate> <csvFile> <jsonFile>")
sys.exit(1)
# sincedate = "2019-05-15"
sincedate = sys.argv[1]
# untildate = "2019-05-16"
untildate = sys.argv[2]
# melhor parametrizar esse arquivos de saida
# csvFile = open('cotas.csv', 'a')
# jsonFile = open('cotas.json', 'a')
csvFile = open(sys.argv[3], 'a')
jsonFile = open(sys.argv[4], 'a')
search(api, sincedate, untildate, csvFile, jsonFile)
csvFile.close()
jsonFile.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment