collectTweets.py 3.83 KB
Newer Older
rdecoupe's avatar
rdecoupe committed
1
2
3
#!/usr/bin/env python

"""
rdecoupe's avatar
rdecoupe committed
4
5
@brief Collect Tweets
@author Remy Decoupes
Rémy Decoupes's avatar
Rémy Decoupes committed
6
@copyright CeCILL-B
rdecoupe's avatar
rdecoupe committed
7

rdecoupe's avatar
rdecoupe committed
8
9
10
11
12
13
Connect to Twitter stream using Twitter API and filter tweets which have to be retrieved with
    - Account to follow : accountsFollowed.csv
    - Hashtag to follow : keywordsFilter.csv

To install and run this script : please follow instructions from README.md
"""
rdecoupe's avatar
rdecoupe committed
14
15
import tweepy
import sys
rdecoupe's avatar
rdecoupe committed
16
17
import logging
from logging.handlers import RotatingFileHandler
Rémy Decoupes's avatar
Rémy Decoupes committed
18
import pandas as pd
19
import time
rdecoupe's avatar
rdecoupe committed
20
21


rdecoupe's avatar
rdecoupe committed
22
23
24
25
26
27
28
29
30
31
def exitscript(logger, message):
    """
    Log error and exit script
    :param logger: a logger object
    :param message: print a message
    :return:
    """
    logger.error("The program encountered an error")
    logger.error(msg)
    logger.error("End of execution.")
rdecoupe's avatar
rdecoupe committed
32
33
34
    sys.exit(1)


rdecoupe's avatar
rdecoupe committed
35
36
37
38
39
40
41
42
43
44
def logsetup():
    """
    Initiate a logger object :
        - Log in file : collectweets.log
        - also print on screen
    :return: logger object
    """
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)
    formatter = logging.Formatter('%(asctime)s :: %(levelname)s :: %(message)s')
rdecoupe's avatar
rdecoupe committed
45
    file_handler = RotatingFileHandler('log/collectweets.log', 'a', 1000000, 1)
rdecoupe's avatar
rdecoupe committed
46
47
48
49
    file_handler.setLevel(logging.DEBUG)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    stream_handler = logging.StreamHandler()
Rémy Decoupes's avatar
Rémy Decoupes committed
50
51
    # Only display on screen INFO
    stream_handler.setLevel(logging.INFO)
rdecoupe's avatar
rdecoupe committed
52
53
    logger.addHandler(stream_handler)
    return logger
rdecoupe's avatar
rdecoupe committed
54

Rémy Decoupes's avatar
Rémy Decoupes committed
55
56
57
58
59
60
61
62
63

class Listener(tweepy.StreamListener):
    def __init__(self, output_file=sys.stdout, logger=sys.stdout):
        super(Listener, self).__init__()
        self.output_file = output_file
        self.logger = logger
        self.logger.info("initiate stream listener")

    def on_status(self, status):
64
65
66
67
68
        try:
            print(status._json, file=self.output_file)
        except:
            msg = "can not save tweets in file"+str(self.output_file)
            exitscript(logger, msg)
Rémy Decoupes's avatar
Rémy Decoupes committed
69
70

    def on_error(self, status_code):
71
        logger.error("Error on stream twitter: "+str(status_code))
Rémy Decoupes's avatar
Rémy Decoupes committed
72
73
74
        return False


rdecoupe's avatar
rdecoupe committed
75
if __name__ == '__main__':
rdecoupe's avatar
rdecoupe committed
76
77
    # initialize a logger :
    logger = logsetup()
Rémy Decoupes's avatar
Rémy Decoupes committed
78
    logger.info("Collect tweets : start")
Rémy Decoupes's avatar
Rémy Decoupes committed
79

rdecoupe's avatar
rdecoupe committed
80
81
    # try import credentials of MOODTwitter account
    try:
rdecoupe's avatar
rdecoupe committed
82
        from params import credentials
rdecoupe's avatar
rdecoupe committed
83
84
85
    except ImportError:
        msg = 'it seems there is no file named :"credentials.py"'
        exitscript(logger, msg)
Rémy Decoupes's avatar
Rémy Decoupes committed
86

rdecoupe's avatar
rdecoupe committed
87
88
89
90
91
92
    # Access and authorize on MOOD twitter Account
    try:
        auth = tweepy.OAuthHandler(credentials.consumer_key, credentials.consumer_secret)
        auth.set_access_token(credentials.access_token, credentials.access_token_secret)
        api = tweepy.API(auth)
        # Get the User object for twitter...
Rémy Decoupes's avatar
Rémy Decoupes committed
93
        accountused = api.me()
Rémy Decoupes's avatar
Rémy Decoupes committed
94
        logger.info("Log with: " + accountused.name)
rdecoupe's avatar
rdecoupe committed
95
    except tweepy.TweepError as twe:
Rémy Decoupes's avatar
Rémy Decoupes committed
96
97
        msg = "Wrong credentials: please check credentials.py"
        exitscript(logger, msg)
rdecoupe's avatar
rdecoupe committed
98
    except Exception as e:
rdecoupe's avatar
rdecoupe committed
99
100
        msg = "Please double check credentials.py :" + e
        exitscript(logger, msg)
rdecoupe's avatar
rdecoupe committed
101

Rémy Decoupes's avatar
Rémy Decoupes committed
102
103
104
105
106
107
    # Get twitter ID of account
    accounttofollowed = pd.read_csv("params/accountsFollowed.csv")
    accounttofollowedlist = accounttofollowed['twitterID'].tolist()
    accounttofollowedlist = list(map(str, accounttofollowedlist))

    # Start a Twitter stream
108
109
    timestr = time.strftime("%Y%m%d-%H%M%S")
    tweetouputfilename = "output/tweetoutput"+timestr+".jsonl"
Rémy Decoupes's avatar
Rémy Decoupes committed
110
111
112
113
114
115
116
117
118
119
120
121
    tweetoutput = open(tweetouputfilename, 'w')
    myStreamListener = Listener(tweetoutput, logger)
    stream = tweepy.Stream(auth=api.auth, listener=myStreamListener)
    try:
        logger.info("Start streaming")
        stream.filter(follow=accounttofollowedlist)
    except KeyboardInterrupt as e:
        logger.info("Stream Keyboard Interrupt")
    finally:
        stream.disconnect()
        tweetoutput.close()

Rémy Decoupes's avatar
Rémy Decoupes committed
122
    logger.info("Collect tweets : proceeded normally")