{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Sentiment Analysis using Article Headlines from FinViz.com" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from urllib.request import urlopen, Request\n", "from bs4 import BeautifulSoup" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Create a URL string and a list of tickers\n", "finviz_url = \"https://www.finviz.com/quote.ashx?t=\"\n", "tickers = [\"ZLAB\"]\n", "\n", "news_tables = {} \n", "\n", "# Loop through the tickers to combine with the URL to make one string representing a completed URL\n", "for ticker in tickers:\n", " url = finviz_url + ticker\n", " \n", " # Request the data from URL (specify the user agent for headers)\n", " request = Request(url=url, headers={'user-agent': 'my-app'})\n", " response = urlopen(request)\n", " \n", " # Input the response data into BeautifulSoup specifying parse as html\n", " html = BeautifulSoup(response, 'html')\n", " \n", " # Get the HTML objects of the table that contain the article headlines and assign to each ticker in the dictionary\n", " news_table = html.find(id = 'news-table')\n", " news_tables[ticker] = news_table" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "parsed_data = []\n", "\n", "for ticker, news_table in news_tables.items():\n", " \n", " # Getting all the tr elements from each row of the news_tables dictionary\n", " for row in news_table.findAll('tr'):\n", " # Get titles in the tag \n", " title = row.a.text\n", " # Split the text of date and time based on a space\n", " date_data = row.td.text.split(' ')\n", " # If the lenth of date_data is 1 then it is time. If the length is 2, first is date and second is time.\n", " if len(date_data) == 1:\n", " time = date_data[0]\n", " else:\n", " date = date_data[0]\n", " time = date_data[1]\n", " # Append the ticker, date, time, and title that results in a list of list inside the parsed_data\n", " parsed_data.append([ticker, date, time, title])\n", "\n", "# print(parsed_data) " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Import Pandas & Natural Language Toolkit Library\n", "import pandas as pd\n", "import nltk\n", "from nltk.sentiment.vader import SentimentIntensityAnalyzer" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tickerdatetimetitle
0ZLABMar-12-2111:54AM7 Biotech Stocks With Catalysts That Go Far Be...
1ZLABMar-04-2104:19PMWhy Amgen's $1.9 Billion Takeover Might Only W...
2ZLABMar-02-2111:56PMAnalysts Are More Bearish On Zai Lab Limited (...
3ZLABMar-01-2107:00AMZai Lab Announces Financial Results for Second...
4ZLABFeb-27-2109:23AM15 Fastest Growing Biotech Companies in the US
\n", "
" ], "text/plain": [ " ticker date time \\\n", "0 ZLAB Mar-12-21 11:54AM   \n", "1 ZLAB Mar-04-21 04:19PM   \n", "2 ZLAB Mar-02-21 11:56PM   \n", "3 ZLAB Mar-01-21 07:00AM   \n", "4 ZLAB Feb-27-21 09:23AM   \n", "\n", " title \n", "0 7 Biotech Stocks With Catalysts That Go Far Be... \n", "1 Why Amgen's $1.9 Billion Takeover Might Only W... \n", "2 Analysts Are More Bearish On Zai Lab Limited (... \n", "3 Zai Lab Announces Financial Results for Second... \n", "4 15 Fastest Growing Biotech Companies in the US " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(parsed_data, columns=['ticker', 'date', 'time', 'title'])\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Initialize the VADER sentiment analyzer\n", "analyzer = SentimentIntensityAnalyzer()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tickerdatetimetitlecompound
0ZLAB2021-03-1211:54AM7 Biotech Stocks With Catalysts That Go Far Be...0.0000
1ZLAB2021-03-0404:19PMWhy Amgen's $1.9 Billion Takeover Might Only W...0.0000
2ZLAB2021-03-0211:56PMAnalysts Are More Bearish On Zai Lab Limited (...-0.2263
3ZLAB2021-03-0107:00AMZai Lab Announces Financial Results for Second...0.0000
4ZLAB2021-02-2709:23AM15 Fastest Growing Biotech Companies in the US0.1779
\n", "
" ], "text/plain": [ " ticker date time \\\n", "0 ZLAB 2021-03-12 11:54AM   \n", "1 ZLAB 2021-03-04 04:19PM   \n", "2 ZLAB 2021-03-02 11:56PM   \n", "3 ZLAB 2021-03-01 07:00AM   \n", "4 ZLAB 2021-02-27 09:23AM   \n", "\n", " title compound \n", "0 7 Biotech Stocks With Catalysts That Go Far Be... 0.0000 \n", "1 Why Amgen's $1.9 Billion Takeover Might Only W... 0.0000 \n", "2 Analysts Are More Bearish On Zai Lab Limited (... -0.2263 \n", "3 Zai Lab Announces Financial Results for Second... 0.0000 \n", "4 15 Fastest Growing Biotech Companies in the US 0.1779 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f = lambda title: analyzer.polarity_scores(title)['compound']\n", "df['compound'] = df['title'].apply(f)\n", "df['date'] = pd.to_datetime(df.date).dt.date\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "df.to_csv(\"../Data/zlab_sentiment_finviz.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# Sentiment calculation based on compound score\n", "def get_sentiment(score):\n", " \"\"\"\n", " Calculates the sentiment based on the compound score.\n", " \"\"\"\n", " result = 0 # Neutral by default\n", " if score >= 0.05: # Positive\n", " result = 1\n", " elif score <= -0.05: # Negative\n", " result = -1\n", "\n", " return result" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tickerdatetimetitle
0ZLAB2021-03-1211:54AM7 Biotech Stocks With Catalysts That Go Far Be...
1ZLAB2021-03-0404:19PMWhy Amgen's $1.9 Billion Takeover Might Only W...
2ZLAB2021-03-0211:56PMAnalysts Are More Bearish On Zai Lab Limited (...
3ZLAB2021-03-0107:00AMZai Lab Announces Financial Results for Second...
4ZLAB2021-02-2709:23AM15 Fastest Growing Biotech Companies in the US
\n", "
" ], "text/plain": [ " ticker date time \\\n", "0 ZLAB 2021-03-12 11:54AM   \n", "1 ZLAB 2021-03-04 04:19PM   \n", "2 ZLAB 2021-03-02 11:56PM   \n", "3 ZLAB 2021-03-01 07:00AM   \n", "4 ZLAB 2021-02-27 09:23AM   \n", "\n", " title \n", "0 7 Biotech Stocks With Catalysts That Go Far Be... \n", "1 Why Amgen's $1.9 Billion Takeover Might Only W... \n", "2 Analysts Are More Bearish On Zai Lab Limited (... \n", "3 Zai Lab Announces Financial Results for Second... \n", "4 15 Fastest Growing Biotech Companies in the US " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.drop(columns=[\"compound\"], inplace=True)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# Sentiment scores dictionaries\n", "zlab_sent = {\n", " \"zlab_compound\": [],\n", " \"zlab_sentiment\": [],\n", "}\n", "\n", "# Get sentiment for the tweets\n", "for index, row in df.iterrows():\n", " try:\n", " # Sentiment scoring with VADER\n", " zlab_sentiment = analyzer.polarity_scores(row[\"title\"])\n", " zlab_sent[\"zlab_compound\"].append(zlab_sentiment[\"compound\"])\n", " zlab_sent[\"zlab_sentiment\"].append(get_sentiment(zlab_sentiment[\"compound\"]))\n", "\n", " except AttributeError:\n", " pass\n", "\n", "# Attaching sentiment columns to the News DataFrame\n", "zlab_sentiment_df = pd.DataFrame(zlab_sent)\n", "zlab_df = df.join(zlab_sentiment_df)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tickerdatetimetitlezlab_compoundzlab_sentiment
0ZLAB2021-03-1211:54AM7 Biotech Stocks With Catalysts That Go Far Be...0.00000
1ZLAB2021-03-0404:19PMWhy Amgen's $1.9 Billion Takeover Might Only W...0.00000
2ZLAB2021-03-0211:56PMAnalysts Are More Bearish On Zai Lab Limited (...-0.2263-1
3ZLAB2021-03-0107:00AMZai Lab Announces Financial Results for Second...0.00000
4ZLAB2021-02-2709:23AM15 Fastest Growing Biotech Companies in the US0.17791
\n", "
" ], "text/plain": [ " ticker date time \\\n", "0 ZLAB 2021-03-12 11:54AM   \n", "1 ZLAB 2021-03-04 04:19PM   \n", "2 ZLAB 2021-03-02 11:56PM   \n", "3 ZLAB 2021-03-01 07:00AM   \n", "4 ZLAB 2021-02-27 09:23AM   \n", "\n", " title zlab_compound \\\n", "0 7 Biotech Stocks With Catalysts That Go Far Be... 0.0000 \n", "1 Why Amgen's $1.9 Billion Takeover Might Only W... 0.0000 \n", "2 Analysts Are More Bearish On Zai Lab Limited (... -0.2263 \n", "3 Zai Lab Announces Financial Results for Second... 0.0000 \n", "4 15 Fastest Growing Biotech Companies in the US 0.1779 \n", "\n", " zlab_sentiment \n", "0 0 \n", "1 0 \n", "2 -1 \n", "3 0 \n", "4 1 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "zlab_df.head()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "zlab_df.to_csv(\"../Data/zlab_combined_sentiment.csv\", index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python (mlenv)", "language": "python", "name": "mlenv" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" } }, "nbformat": 4, "nbformat_minor": 4 }