First beta version

author: darroyolpz <darroyolpz@users.noreply.github.com> 2019-09-07 17:59:00 +0200
committer: GitHub <noreply@github.com> 2019-09-07 17:59:00 +0200
commit: 37ce1116c68a136171f8eb0599f2baecf38227ed (patch)
tree: 715fa574eb855b61c4846431797cb1faf7decde0
parent: 1bab5054c54c706469fe414b29074f20bbdabacb (diff)
1 files changed, 214 insertions, 0 deletions
diff --git a/Data Scraping for Binance Announcements.ipynb b/Data Scraping for Binance Announcements.ipynb
new file mode 100644
index 0000000..d45aaa1
--- /dev/null
+++ b/Data Scraping for Binance Announcements.ipynb
@@ -0,0 +1,214 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Data Scraping for Binance Announcements\n",
+    "Beta version. Modified on 07-09-2019"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Import all the needed packages:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import bs4 as bs\n",
+    "import urllib.request\n",
+    "import tweepy, os"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Twitter app"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "consumer_key = os.environ.get('TW_CONSUMER_KEY')\n",
+    "consumer_secret = os.environ.get('TW_CONSUMER_SECRET')\n",
+    "access_token = os.environ.get('TW_ACCESS_TOKEN')\n",
+    "access_token_secret = os.environ.get('TW_ACCESS_TOKEN_SECRET')\n",
+    "# authentication of consumer key and secret\n",
+    "auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n",
+    "# authentication of access token and secret\n",
+    "auth.set_access_token(access_token, access_token_secret)\n",
+    "api = tweepy.API(auth)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create two empty lists for storing news urls. Message should be sent when there is a new item in the news_urls that wasn't in the old_urls"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "old_urls, news_urls = [], []"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a bag of key words for getting matches. Don't use plurals, otherwise will get duplicates"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "key_words = ['List', 'list', 'Token Sale', 'Open Trading', 'open trading']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create the function to extract the information from the webpage and get the matchings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def extract_binance(main_webpage, key_words):\n",
+    "    final_item, final_list = [], []\n",
+    "    sauce = urllib.request.urlopen(main_webpage).read()\n",
+    "    soup = bs.BeautifulSoup(sauce, 'lxml')\n",
+    "    list = soup.find_all('li', class_ = 'article-list-item')\n",
+    "    for article in list:\n",
+    "        article_text = article.get_text().replace('\\n', '')\n",
+    "        for item in key_words:\n",
+    "            if item in article_text:\n",
+    "                final_item.append(article_text)\n",
+    "                final_item.append('https://www.binance.com' + article.find('a').get('href'))\n",
+    "                final_list.append(final_item)\n",
+    "                final_item = [] # Reset once is in the final_list to not get duplicates\n",
+    "    return final_list"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Get the first pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 38.2 ms, sys: 3.74 ms, total: 41.9 ms\n",
+      "Wall time: 147 ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "main_webpage = 'https://www.binance.com/en/support/categories/115000056351'\n",
+    "old_urls = extract_binance(main_webpage, key_words)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[['Binance Lists Chiliz (CHZ)',\n",
+       "  'https://www.binance.com/en/support/articles/360033377831'],\n",
+       " ['Binance Completes Perlin Lottery Draw and Will Open Trading For PERL',\n",
+       "  'https://www.binance.com/en/support/articles/360032900851'],\n",
+       " ['Binance Lists Second BEP2 Community Listing Project - TomoChain (TOMO)',\n",
+       "  'https://www.binance.com/en/support/articles/360032514812'],\n",
+       " ['Introducing the Band Protocol (BAND) Token Sale on Binance Launchpad',\n",
+       "  'https://www.binance.com/en/support/articles/360033102832'],\n",
+       " ['Pepito', 'Fulanito']]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "new_urls = extract_binance(main_webpage, key_words)\n",
+    "a = ['Pepito', 'Fulanito']\n",
+    "new_urls.append(a)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pepito\n"
+     ]
+    }
+   ],
+   "source": [
+    "for item in new_urls:\n",
+    "    if item not in old_urls:\n",
+    "        msg = item[0]\n",
+    "        print(msg)\n",
+    "        #api.update_status('Testing')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
author	darroyolpz <darroyolpz@users.noreply.github.com>	2019-09-07 17:59:00 +0200
committer	GitHub <noreply@github.com>	2019-09-07 17:59:00 +0200
commit	37ce1116c68a136171f8eb0599f2baecf38227ed (patch)
tree	715fa574eb855b61c4846431797cb1faf7decde0
parent	1bab5054c54c706469fe414b29074f20bbdabacb (diff)