collectTweets.py 3.69 KB
Newer Older
rdecoupe's avatar
rdecoupe committed
1
2
3
#!/usr/bin/env python

"""
rdecoupe's avatar
rdecoupe committed
4
5
6
7
@brief Collect Tweets
@author Remy Decoupes
@copyright

rdecoupe's avatar
rdecoupe committed
8
9
10
11
12
13
Connect to Twitter stream using Twitter API and filter tweets which have to be retrieved with
    - Account to follow : accountsFollowed.csv
    - Hashtag to follow : keywordsFilter.csv

To install and run this script : please follow instructions from README.md
"""
rdecoupe's avatar
rdecoupe committed
14
15
import tweepy
import sys
rdecoupe's avatar
rdecoupe committed
16
17
import logging
from logging.handlers import RotatingFileHandler
Rémy Decoupes's avatar
Rémy Decoupes committed
18
import pandas as pd
19
import time
rdecoupe's avatar
rdecoupe committed
20
21


rdecoupe's avatar
rdecoupe committed
22
23
24
25
26
27
28
29
30
31
def exitscript(logger, message):
    """
    Log error and exit script
    :param logger: a logger object
    :param message: print a message
    :return:
    """
    logger.error("The program encountered an error")
    logger.error(msg)
    logger.error("End of execution.")
rdecoupe's avatar
rdecoupe committed
32
33
34
    sys.exit(1)


rdecoupe's avatar
rdecoupe committed
35
36
37
38
39
40
41
42
43
44
def logsetup():
    """
    Initiate a logger object :
        - Log in file : collectweets.log
        - also print on screen
    :return: logger object
    """
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)
    formatter = logging.Formatter('%(asctime)s :: %(levelname)s :: %(message)s')
rdecoupe's avatar
rdecoupe committed
45
    file_handler = RotatingFileHandler('log/collectweets.log', 'a', 1000000, 1)
rdecoupe's avatar
rdecoupe committed
46
47
48
49
    file_handler.setLevel(logging.DEBUG)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    stream_handler = logging.StreamHandler()
Rémy Decoupes's avatar
Rémy Decoupes committed
50
51
    # Only display on screen INFO
    stream_handler.setLevel(logging.INFO)
rdecoupe's avatar
rdecoupe committed
52
53
    logger.addHandler(stream_handler)
    return logger
rdecoupe's avatar
rdecoupe committed
54

Rémy Decoupes's avatar
Rémy Decoupes committed
55
56
57
58
59
60
61
62
63

class Listener(tweepy.StreamListener):
    def __init__(self, output_file=sys.stdout, logger=sys.stdout):
        super(Listener, self).__init__()
        self.output_file = output_file
        self.logger = logger
        self.logger.info("initiate stream listener")

    def on_status(self, status):
64
        print(status._json, file=self.output_file)
Rémy Decoupes's avatar
Rémy Decoupes committed
65
66
67
68
69
70

    def on_error(self, status_code):
        logger.error("Error on stream twiter: "+str(status_code))
        return False


rdecoupe's avatar
rdecoupe committed
71
if __name__ == '__main__':
rdecoupe's avatar
rdecoupe committed
72
73
    # initialize a logger :
    logger = logsetup()
Rémy Decoupes's avatar
Rémy Decoupes committed
74
    logger.info("Collect tweets : start")
Rémy Decoupes's avatar
Rémy Decoupes committed
75

rdecoupe's avatar
rdecoupe committed
76
77
    # try import credentials of MOODTwitter account
    try:
rdecoupe's avatar
rdecoupe committed
78
        from params import credentials
rdecoupe's avatar
rdecoupe committed
79
80
81
    except ImportError:
        msg = 'it seems there is no file named :"credentials.py"'
        exitscript(logger, msg)
Rémy Decoupes's avatar
Rémy Decoupes committed
82

rdecoupe's avatar
rdecoupe committed
83
84
85
86
87
88
    # Access and authorize on MOOD twitter Account
    try:
        auth = tweepy.OAuthHandler(credentials.consumer_key, credentials.consumer_secret)
        auth.set_access_token(credentials.access_token, credentials.access_token_secret)
        api = tweepy.API(auth)
        # Get the User object for twitter...
Rémy Decoupes's avatar
Rémy Decoupes committed
89
        accountused = api.me()
Rémy Decoupes's avatar
Rémy Decoupes committed
90
        logger.info("Log with: " + accountused.name)
rdecoupe's avatar
rdecoupe committed
91
    except tweepy.TweepError as twe:
Rémy Decoupes's avatar
Rémy Decoupes committed
92
93
        msg = "Wrong credentials: please check credentials.py"
        exitscript(logger, msg)
rdecoupe's avatar
rdecoupe committed
94
    except Exception as e:
rdecoupe's avatar
rdecoupe committed
95
96
        msg = "Please double check credentials.py :" + e
        exitscript(logger, msg)
rdecoupe's avatar
rdecoupe committed
97

Rémy Decoupes's avatar
Rémy Decoupes committed
98
99
100
101
102
103
    # Get twitter ID of account
    accounttofollowed = pd.read_csv("params/accountsFollowed.csv")
    accounttofollowedlist = accounttofollowed['twitterID'].tolist()
    accounttofollowedlist = list(map(str, accounttofollowedlist))

    # Start a Twitter stream
104
105
    timestr = time.strftime("%Y%m%d-%H%M%S")
    tweetouputfilename = "output/tweetoutput"+timestr+".jsonl"
Rémy Decoupes's avatar
Rémy Decoupes committed
106
107
108
109
110
111
112
113
114
115
116
117
    tweetoutput = open(tweetouputfilename, 'w')
    myStreamListener = Listener(tweetoutput, logger)
    stream = tweepy.Stream(auth=api.auth, listener=myStreamListener)
    try:
        logger.info("Start streaming")
        stream.filter(follow=accounttofollowedlist)
    except KeyboardInterrupt as e:
        logger.info("Stream Keyboard Interrupt")
    finally:
        stream.disconnect()
        tweetoutput.close()

Rémy Decoupes's avatar
Rémy Decoupes committed
118
    logger.info("Collect tweets : proceeded normally")