Initial commit

Signed-off-by: Alessandro Elias <ale.elias2011@gmail.com>

Initial commit
ab6446a0 · Alessandro Elias · b2dc7e56 · ab6446a0 · ab6446a0
Commit ab6446a0 authored 6 years ago by Alessandro Elias
--- a/.projectfile
+++ b/.projectfile
--- a/twitter-api-example.py
+++ b/twitter-api-example.py
+import sys
+import jsonpickle
+import os
+import tweepy
+import csv
+import json
+import logging
+logging.basicConfig(level=logging.INFO)
+####input your credentials here
+consumer_key = 'pcn6szyFLVnzxDclKazZ3tQPI'
+consumer_secret = 'OGKptAjYnoh33mvPEkBPQgcApNXFysbCUx2CjQwjtHhr7Z9unO'
+access_token = '1127995977963646977-vqZMlKE8vJLSFMWmL6J9ouEZBncPi1'
+access_token_secret = 'xT6WMGkUZJULIMAwpG0AUWtOFYDdGMBYAFSHUZIo9NLYh'
+auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
+auth.set_access_token(access_token, access_token_secret)
+api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
+# a parte de baixo que é a realmente importante...
+searchQuery = '((cota OR cotas OR universidade OR universidades) AND (racial OR raciais)) OR ((universidade OR universidades) AND (cota OR cotas)) '
+# sugiro tirar o sincedate; assim fica menos feito o código. Eu também lembro de dar uns paus; e de qq forma
+# a API só volta cerca de uma semana no passado mesmo
+sincedate =  "2018-11-20"
+untildate = "2018-11-21"
+maxTweets = 10000000
+# Testar colocar esse limite. Só para eles não destruirem a cahve
+tweetsPerQry = 100  # this is the max the API permits
+# melhor parametrizar esse arquivos de saida
+csvFile = open('cotas.csv', 'a')
+jsonFile = open('cotas.json', 'a')
+csvWriter = csv.writer(csvFile)
+# If results from a specific ID onwards are reqd, set since_id to that ID.
+# else default to no lower limit, go as far back as API allows
+sinceId = None
+# If results only below a specific ID are, set max_id to that ID.
+# else default to no upper limit, start from the most recent tweet matching the search query.
+max_id = -1L
+#max_id = 1045463072670920704
+tweetCount = 0
+print("Downloading max {0} tweets".format(maxTweets))
+while tweetCount < maxTweets:
+    try:
+        if (max_id <= 0):
+            if (not sinceId):
+                new_tweets = api.search(q=searchQuery,  since = sincedate, until = untildate, count=tweetsPerQry)
+            else:
+                new_tweets = api.search(q=searchQuery,  until = untildate, count=tweetsPerQry, since_id=sinceId)
+        else:
+            if (not sinceId):
+                new_tweets = api.search(q=searchQuery,  since = sincedate, until = untildate, count=tweetsPerQry, max_id=str(max_id - 1))
+            else:
+                new_tweets = api.search(q=searchQuery,  until = untildate, count=tweetsPerQry, max_id=str(max_id - 1), since_id=sinceId)
+        if not new_tweets:
+            print("No more tweets found")
+            break
+        for tweet in new_tweets:
+            json.dump(tweet._json, jsonFile)
+            jsonFile.write('\n')
+            # não coloquei todos os campos no csv
+            csvWriter.writerow([
+                    tweet.created_at,
+                    tweet.id,
+                    tweet.in_reply_to_status_id,
+                    tweet.in_reply_to_user_id,
+                    tweet.in_reply_to_screen_name,
+                    tweet.user.id, tweet.user.screen_name,
+                    tweet.user.followers_count,
+                    tweet.is_quote_status,
+                    tweet.retweet_count,
+                    tweet.favorite_count,
+                    tweet.lang,
+                    tweet.text.encode('utf-8')])
+        tweetCount += len(new_tweets)
+        print("Downloaded {0} tweets".format(tweetCount))
+        max_id = new_tweets[-1].id
+    except tweepy.TweepError as e:
+        print("some error : " + str(e))
+        continue
+print ("Downloaded {0} tweets".format(tweetCount))