collectTweets.py 3.62 KB
Newer Older
rdecoupe's avatar
rdecoupe committed
1
2
3
#!/usr/bin/env python

"""
rdecoupe's avatar
rdecoupe committed
4
5
6
7
@brief Collect Tweets
@author Remy Decoupes
@copyright

rdecoupe's avatar
rdecoupe committed
8
9
10
11
12
13
Connect to Twitter stream using Twitter API and filter tweets which have to be retrieved with
    - Account to follow : accountsFollowed.csv
    - Hashtag to follow : keywordsFilter.csv

To install and run this script : please follow instructions from README.md
"""
rdecoupe's avatar
rdecoupe committed
14
15
import tweepy
import sys
rdecoupe's avatar
rdecoupe committed
16
17
import logging
from logging.handlers import RotatingFileHandler
Rémy Decoupes's avatar
Rémy Decoupes committed
18
import pandas as pd
rdecoupe's avatar
rdecoupe committed
19
20


rdecoupe's avatar
rdecoupe committed
21
22
23
24
25
26
27
28
29
30
def exitscript(logger, message):
    """
    Log error and exit script
    :param logger: a logger object
    :param message: print a message
    :return:
    """
    logger.error("The program encountered an error")
    logger.error(msg)
    logger.error("End of execution.")
rdecoupe's avatar
rdecoupe committed
31
32
33
    sys.exit(1)


rdecoupe's avatar
rdecoupe committed
34
35
36
37
38
39
40
41
42
43
def logsetup():
    """
    Initiate a logger object :
        - Log in file : collectweets.log
        - also print on screen
    :return: logger object
    """
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)
    formatter = logging.Formatter('%(asctime)s :: %(levelname)s :: %(message)s')
rdecoupe's avatar
rdecoupe committed
44
    file_handler = RotatingFileHandler('log/collectweets.log', 'a', 1000000, 1)
rdecoupe's avatar
rdecoupe committed
45
46
47
48
    file_handler.setLevel(logging.DEBUG)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    stream_handler = logging.StreamHandler()
Rémy Decoupes's avatar
Rémy Decoupes committed
49
50
    # Only display on screen INFO
    stream_handler.setLevel(logging.INFO)
rdecoupe's avatar
rdecoupe committed
51
52
    logger.addHandler(stream_handler)
    return logger
rdecoupe's avatar
rdecoupe committed
53

Rémy Decoupes's avatar
Rémy Decoupes committed
54
55
56
57
58
59
60
61
62

class Listener(tweepy.StreamListener):
    def __init__(self, output_file=sys.stdout, logger=sys.stdout):
        super(Listener, self).__init__()
        self.output_file = output_file
        self.logger = logger
        self.logger.info("initiate stream listener")

    def on_status(self, status):
63
        print(status._json, file=self.output_file)
Rémy Decoupes's avatar
Rémy Decoupes committed
64
65
66
67
68
69

    def on_error(self, status_code):
        logger.error("Error on stream twiter: "+str(status_code))
        return False


rdecoupe's avatar
rdecoupe committed
70
if __name__ == '__main__':
rdecoupe's avatar
rdecoupe committed
71
72
    # initialize a logger :
    logger = logsetup()
Rémy Decoupes's avatar
Rémy Decoupes committed
73
    logger.info("Collect tweets : start")
Rémy Decoupes's avatar
Rémy Decoupes committed
74

rdecoupe's avatar
rdecoupe committed
75
76
    # try import credentials of MOODTwitter account
    try:
rdecoupe's avatar
rdecoupe committed
77
        from params import credentials
rdecoupe's avatar
rdecoupe committed
78
79
80
    except ImportError:
        msg = 'it seems there is no file named :"credentials.py"'
        exitscript(logger, msg)
Rémy Decoupes's avatar
Rémy Decoupes committed
81

rdecoupe's avatar
rdecoupe committed
82
83
84
85
86
87
    # Access and authorize on MOOD twitter Account
    try:
        auth = tweepy.OAuthHandler(credentials.consumer_key, credentials.consumer_secret)
        auth.set_access_token(credentials.access_token, credentials.access_token_secret)
        api = tweepy.API(auth)
        # Get the User object for twitter...
Rémy Decoupes's avatar
Rémy Decoupes committed
88
        accountused = api.me()
Rémy Decoupes's avatar
Rémy Decoupes committed
89
        logger.info("Log with: " + accountused.name)
rdecoupe's avatar
rdecoupe committed
90
    except tweepy.TweepError as twe:
Rémy Decoupes's avatar
Rémy Decoupes committed
91
92
        msg = "Wrong credentials: please check credentials.py"
        exitscript(logger, msg)
rdecoupe's avatar
rdecoupe committed
93
    except Exception as e:
rdecoupe's avatar
rdecoupe committed
94
95
        msg = "Please double check credentials.py :" + e
        exitscript(logger, msg)
rdecoupe's avatar
rdecoupe committed
96

Rémy Decoupes's avatar
Rémy Decoupes committed
97
98
99
100
101
102
    # Get twitter ID of account
    accounttofollowed = pd.read_csv("params/accountsFollowed.csv")
    accounttofollowedlist = accounttofollowed['twitterID'].tolist()
    accounttofollowedlist = list(map(str, accounttofollowedlist))

    # Start a Twitter stream
103
    tweetouputfilename = "output/tweetoutput.jsonl"
Rémy Decoupes's avatar
Rémy Decoupes committed
104
105
106
107
108
109
110
111
112
113
114
115
    tweetoutput = open(tweetouputfilename, 'w')
    myStreamListener = Listener(tweetoutput, logger)
    stream = tweepy.Stream(auth=api.auth, listener=myStreamListener)
    try:
        logger.info("Start streaming")
        stream.filter(follow=accounttofollowedlist)
    except KeyboardInterrupt as e:
        logger.info("Stream Keyboard Interrupt")
    finally:
        stream.disconnect()
        tweetoutput.close()

Rémy Decoupes's avatar
Rémy Decoupes committed
116
    logger.info("Collect tweets : proceeded normally")