diff --git a/twitterPredictor/twitterCollect/__pycache__/dataframe.cpython-36.pyc b/twitterPredictor/twitterCollect/__pycache__/dataframe.cpython-36.pyc index 1e169cdb07df7a6edb1e6ec52e443e55e2253a02..c9abb0d78d329c2b6221a9c902a1054ffa06b314 100644 Binary files a/twitterPredictor/twitterCollect/__pycache__/dataframe.cpython-36.pyc and b/twitterPredictor/twitterCollect/__pycache__/dataframe.cpython-36.pyc differ diff --git a/twitterPredictor/twitterCollect/dataframe.py b/twitterPredictor/twitterCollect/dataframe.py index 1c0ad01896f00f6f296d957f195585098f4918d6..4cd8243d949162c6f18a676f45d6731c9042a3dc 100644 --- a/twitterPredictor/twitterCollect/dataframe.py +++ b/twitterPredictor/twitterCollect/dataframe.py @@ -37,8 +37,7 @@ def convert_2_dataframe(data): hash_list.append("#"+hash.get("text")) hashtags.append(hash_list) - - print(tweet.retweet_count) + retweets.append(tweet.retweet_count) likes.append(tweet.favorite_count) diff --git a/twitterPredictor/twitterCollect/opinion.py b/twitterPredictor/twitterCollect/opinion.py new file mode 100644 index 0000000000000000000000000000000000000000..77714dfddfa9df6c63e84735c6b117ff4f97a576 --- /dev/null +++ b/twitterPredictor/twitterCollect/opinion.py @@ -0,0 +1,36 @@ +from collect_candidate_tweet_activity import * +from dataframe import * +from textblob import * + +def categorize_tweets(data,neutral_line): + pos_tweets = [] + neu_tweets = [] + neg_tweets = [] + + for item in data["text"]: + try: + blob = TextBlob(item) + blob = blob.translate(to='en') + except: + blob = TextBlob(item) + + polarity = blob.sentiment.polarity + print(blob) + print(polarity) + if polarity<=neutral_line and polarity >=-neutral_line: + neu_tweets.append(item) + elif polarity > neutral_line: + pos_tweets.append(item) + else: + neg_tweets.append(item) + + return pos_tweets,neu_tweets,neg_tweets + +tweets = get_replies_to_candidate("EmmanuelMacron") +data = convert_2_dataframe(tweets) + +pos_tweets,neu_tweets,neg_tweets = categorize_tweets(data,0.1) + +print("Percentage of positive tweets: {}%".format(len(pos_tweets)*100/len(data['text']))) +print("Percentage of neutral tweets: {}%".format(len(neu_tweets)*100/len(data['text']))) +print("Percentage de negative tweets: {}%".format(len(neg_tweets)*100/len(data['text'])))