Skip to content
Snippets Groups Projects
Commit 7930f8f1 authored by Alessandro's avatar Alessandro
Browse files

Close files

parent 9ceba722
Branches
No related tags found
No related merge requests found
...@@ -9,32 +9,14 @@ import json ...@@ -9,32 +9,14 @@ import json
import logging import logging
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
####input your credentials here def search(api, sincedate, untildate, csvFile, jsonFile):
consumer_key = 'pcn6szyFLVnzxDclKazZ3tQPI'
consumer_secret = 'OGKptAjYnoh33mvPEkBPQgcApNXFysbCUx2CjQwjtHhr7Z9unO'
access_token = '1127995977963646977-vqZMlKE8vJLSFMWmL6J9ouEZBncPi1'
access_token_secret = 'xT6WMGkUZJULIMAwpG0AUWtOFYDdGMBYAFSHUZIo9NLYh'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
# a parte de baixo que é a realmente importante... # a parte de baixo que é a realmente importante...
searchQuery = '((cota OR cotas OR universidade OR universidades) AND (racial OR raciais)) OR ((universidade OR universidades) AND (cota OR cotas)) ' searchQuery = '((cota OR cotas OR universidade OR universidades) AND (racial OR raciais)) OR ((universidade OR universidades) AND (cota OR cotas)) '
# sugiro tirar o sincedate; assim fica menos feito o código. Eu também lembro de dar uns paus; e de qq forma maxTweets = 1000000
# a API só volta cerca de uma semana no passado mesmo
sincedate = "2019-05-15"
untildate = "2019-05-16"
maxTweets = 10000000
# Testar colocar esse limite. Só para eles não destruirem a cahve
tweetsPerQry = 100 # this is the max the API permits tweetsPerQry = 100 # this is the max the API permits
# melhor parametrizar esse arquivos de saida
csvFile = open('cotas.csv', 'a')
jsonFile = open('cotas.json', 'a')
csvWriter = csv.writer(csvFile) csvWriter = csv.writer(csvFile)
# If results from a specific ID onwards are reqd, set since_id to that ID. # If results from a specific ID onwards are reqd, set since_id to that ID.
...@@ -46,8 +28,6 @@ sinceId = None ...@@ -46,8 +28,6 @@ sinceId = None
# Python3 has 9223372036854775807 as max number # Python3 has 9223372036854775807 as max number
max_id = sys.maxsize max_id = sys.maxsize
#max_id = 1045463072670920704
tweetCount = 0 tweetCount = 0
print("Downloading max {0} tweets".format(maxTweets)) print("Downloading max {0} tweets".format(maxTweets))
while tweetCount < maxTweets: while tweetCount < maxTweets:
...@@ -91,3 +71,35 @@ while tweetCount < maxTweets: ...@@ -91,3 +71,35 @@ while tweetCount < maxTweets:
print ("Downloaded {0} tweets".format(tweetCount)) print ("Downloaded {0} tweets".format(tweetCount))
if __name__ == "__main__":
####input your credentials here
consumer_key = 'pcn6szyFLVnzxDclKazZ3tQPI'
consumer_secret = 'OGKptAjYnoh33mvPEkBPQgcApNXFysbCUx2CjQwjtHhr7Z9unO'
access_token = '1127995977963646977-vqZMlKE8vJLSFMWmL6J9ouEZBncPi1'
access_token_secret = 'xT6WMGkUZJULIMAwpG0AUWtOFYDdGMBYAFSHUZIo9NLYh'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
# sugiro tirar o sincedate; assim fica menos feito o código. Eu também lembro de dar uns paus; e de qq forma
# a API só volta cerca de uma semana no passado mesmo
if( len(sys.argv) != 4):
print("Usage "+sys.argv[0]+" <sincedate> <untildate> <csvFile> <jsonFile>")
sys.exit(1)
# sincedate = "2019-05-15"
sincedate = sys.argv[1]
# untildate = "2019-05-16"
untildate = sys.argv[2]
# melhor parametrizar esse arquivos de saida
# csvFile = open('cotas.csv', 'a')
# jsonFile = open('cotas.json', 'a')
csvFile = open(sys.argv[3], 'a')
jsonFile = open(sys.argv[4], 'a')
search(api, sincedate, untildate, csvFile, jsonFile)
csvFile.close()
jsonFile.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment