#!/usr/bin/env python """ @brief Collect Tweets @author Remy Decoupes @copyright CeCILL-B Connect to Twitter stream using Twitter API and filter tweets which have to be retrieved with - Account to follow : accountsFollowed.csv - Hashtag to follow : keywordsFilter.csv To install and run this script : please follow instructions from README.md """ import tweepy import sys import logging from logging.handlers import RotatingFileHandler import pandas as pd import time def exitscript(logger, message): """ Log error and exit script :param logger: a logger object :param message: print a message :return: """ logger.error("The program encountered an error") logger.error(msg) logger.error("End of execution.") sys.exit(1) def logsetup(): """ Initiate a logger object : - Log in file : collectweets.log - also print on screen :return: logger object """ logger = logging.getLogger() logger.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s :: %(levelname)s :: %(message)s') file_handler = RotatingFileHandler('log/collectweets.log', 'a', 1000000, 1) file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(formatter) logger.addHandler(file_handler) stream_handler = logging.StreamHandler() # Only display on screen INFO stream_handler.setLevel(logging.INFO) logger.addHandler(stream_handler) return logger class Listener(tweepy.StreamListener): def __init__(self, output_file=sys.stdout, logger=sys.stdout): super(Listener, self).__init__() self.output_file = output_file self.logger = logger self.logger.info("initiate stream listener") def on_status(self, status): try: print(status._json, file=self.output_file) except: msg = "can not save tweets in file"+str(self.output_file) exitscript(logger, msg) def on_error(self, status_code): logger.error("Error on stream twitter: "+str(status_code)) return False if __name__ == '__main__': # initialize a logger : logger = logsetup() logger.info("Collect tweets : start") # try import credentials of MOODTwitter account try: from params import credentials except ImportError: msg = 'it seems there is no file named :"credentials.py"' exitscript(logger, msg) # Access and authorize on MOOD twitter Account try: auth = tweepy.OAuthHandler(credentials.consumer_key, credentials.consumer_secret) auth.set_access_token(credentials.access_token, credentials.access_token_secret) api = tweepy.API(auth) # Get the User object for twitter... accountused = api.me() logger.info("Log with: " + accountused.name) except tweepy.TweepError as twe: msg = "Wrong credentials: please check credentials.py" exitscript(logger, msg) except Exception as e: msg = "Please double check credentials.py :" + e exitscript(logger, msg) # Get twitter ID of account accounttofollowed = pd.read_csv("params/accountsFollowed.csv") accounttofollowedlist = accounttofollowed['twitterID'].tolist() accounttofollowedlist = list(map(str, accounttofollowedlist)) # Start a Twitter stream timestr = time.strftime("%Y%m%d-%H%M%S") tweetouputfilename = "output/tweetoutput"+timestr+".jsonl" tweetoutput = open(tweetouputfilename, 'w') myStreamListener = Listener(tweetoutput, logger) stream = tweepy.Stream(auth=api.auth, listener=myStreamListener) try: logger.info("Start streaming") stream.filter(follow=accounttofollowedlist) except KeyboardInterrupt as e: logger.info("Stream Keyboard Interrupt") finally: stream.disconnect() tweetoutput.close() logger.info("Collect tweets : proceeded normally")