# News API Sentiment Analysis

In [1]:
import os
import pandas as pd
from datetime import datetime, timedelta

In [2]:
# Import News API and Natural Language Toolkit
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Load APIs

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
newsapi = NewsApiClient(api_key=os.environ["news_api"])

## Create Headline and Sentiment Analyzer Functions

In [5]:
# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp("2021-01-18", tz="America/New_York").isoformat()
past_date = pd.Timestamp("2020-12-18", tz="America/New_York").isoformat()

# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date),
            to=str(date),
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

In [6]:
# Instantiate Sentiment Analyzer
analyzer = SentimentIntensityAnalyzer()

In [32]:
# Sentiment calculation based on compound score
def get_sentiment(score):
    """
    Calculates the sentiment based on the compound score.
    """
    result = 0  # Neutral by default
    if score >= 0.05:  # Positive
        result = 1
    elif score <= -0.05:  # Negative
        result = -1

    return result

In [37]:
roku_headlines, roku_dates = get_headlines("roku")

Fetching news about 'roku'
******************************
retrieving news from: 2021-01-15 00:00:00
retrieving news from: 2021-01-14 00:00:00
retrieving news from: 2021-01-13 00:00:00
retrieving news from: 2021-01-12 00:00:00
retrieving news from: 2021-01-11 00:00:00
retrieving news from: 2021-01-10 00:00:00
retrieving news from: 2021-01-09 00:00:00
retrieving news from: 2021-01-08 00:00:00
retrieving news from: 2021-01-07 00:00:00
retrieving news from: 2021-01-06 00:00:00
retrieving news from: 2021-01-05 00:00:00
retrieving news from: 2021-01-04 00:00:00
retrieving news from: 2021-01-03 00:00:00
retrieving news from: 2021-01-02 00:00:00
retrieving news from: 2021-01-01 00:00:00
retrieving news from: 2020-12-31 00:00:00
retrieving news from: 2020-12-30 00:00:00
retrieving news from: 2020-12-29 00:00:00
retrieving news from: 2020-12-28 00:00:00
retrieving news from: 2020-12-27 00:00:00
retrieving news from: 2020-12-26 00:00:00
retrieving news from: 2020-12-25 00:00:00
retrieving news fr

In [38]:
# Converting the roku_headlines from list of list to just list
roku_list = [headline for sublist in roku_headlines for headline in sublist]

In [39]:
# roku_list
# Convert the new list into a dataframe
roku_df = pd.DataFrame(roku_list)
roku_df.rename(columns={0: "title"}, inplace = True)
roku_df.head()

Unnamed: 0,title
0,"How to cancel your account at Netflix, Amazon ..."
1,"At CES 2021, TCL put all other TV makers on no..."
2,Everything to know about Discovery+
3,How the Xbox’s default “instant on” feature co...
4,What Must Roku Do To Justify Its Lofty Share P...


In [41]:
roku_df.shape

(620, 1)

In [43]:
# Sentiment scores dictionaries
roku_sent = {
    "roku_compound": [],
    "roku_sentiment": [],
}

# Get sentiment for the tweets
for index, row in roku_df.iterrows():
    try:
        # Sentiment scoring with VADER
        roku_sentiment = analyzer.polarity_scores(row["title"])
        roku_sent["roku_compound"].append(roku_sentiment["compound"])
        roku_sent["roku_sentiment"].append(get_sentiment(roku_sentiment["compound"]))

    except AttributeError:
        pass

# Attaching sentiment columns to the News DataFrame
roku_sentiment_df = pd.DataFrame(roku_sent)
roku_df = roku_df.join(roku_sentiment_df)

In [46]:
roku_df.tail()

Unnamed: 0,title,roku_compound,roku_sentiment
615,Roku and WarnerMedia have reached an agreement...,-0.3818,-1.0
616,Finally: HBO Max Coming to Roku After Lengthy ...,0.0,0.0
617,The HBO Max Roku app is launching just in time...,0.0,0.0
618,What is Acorn TV? Everything you need to know ...,0.0,0.0
619,HBO Max Finally Coming to Roku as ‘Wonder Woma...,,


In [49]:
roku_df.dropna(inplace=True)

In [50]:
roku_df.to_csv("../Data/roku_newsapi.csv", index=False)