# News API Sentiment Analysis

In [1]:
import os
import pandas as pd
from datetime import datetime, timedelta

In [2]:
# Import News API and Natural Language Toolkit
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Load APIs

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
newsapi = NewsApiClient(api_key=os.environ["news_api"])

## Create Headline and Sentiment Analyzer Functions

In [5]:
# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp("2021-01-18", tz="America/New_York").isoformat()
past_date = pd.Timestamp("2020-12-18", tz="America/New_York").isoformat()

# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date),
            to=str(date),
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

In [6]:
# Instantiate Sentiment Analyzer
analyzer = SentimentIntensityAnalyzer()

In [7]:
roku_headlines, roku_dates = get_headlines("roku")

Fetching news about 'roku'
******************************
retrieving news from: 2021-01-18 00:00:00
retrieving news from: 2021-01-17 00:00:00
retrieving news from: 2021-01-16 00:00:00
retrieving news from: 2021-01-15 00:00:00
retrieving news from: 2021-01-14 00:00:00
retrieving news from: 2021-01-13 00:00:00
retrieving news from: 2021-01-12 00:00:00
retrieving news from: 2021-01-11 00:00:00
retrieving news from: 2021-01-10 00:00:00
retrieving news from: 2021-01-09 00:00:00
retrieving news from: 2021-01-08 00:00:00
retrieving news from: 2021-01-07 00:00:00
retrieving news from: 2021-01-06 00:00:00
retrieving news from: 2021-01-05 00:00:00
retrieving news from: 2021-01-04 00:00:00
retrieving news from: 2021-01-03 00:00:00
retrieving news from: 2021-01-02 00:00:00
retrieving news from: 2021-01-01 00:00:00
retrieving news from: 2020-12-31 00:00:00
retrieving news from: 2020-12-30 00:00:00
retrieving news from: 2020-12-29 00:00:00
retrieving news from: 2020-12-28 00:00:00
retrieving news fr

In [8]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(analyzer.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [9]:
# Calculate Roku's average compound sentiment scores for each day
roku_avg = headline_sentiment_summarizer_avg(roku_headlines)

In [10]:
roku_sentiment_df = pd.DataFrame({ "Avg_Score" : roku_avg })
roku_sentiment_df.index = pd.to_datetime(roku_dates)
roku_sentiment_df.head()

Unnamed: 0,Avg_Score
2021-01-18,0.08493
2021-01-17,0.150815
2021-01-16,-0.17151
2021-01-15,0.02927
2021-01-14,0.21285


In [11]:
roku_sentiment_df.to_csv("../Data/roku_newsapi_date.csv")