aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordarroyolpz <darroyolpz@users.noreply.github.com>2019-09-09 23:32:42 +0200
committerGitHub <noreply@github.com>2019-09-09 23:32:42 +0200
commitaaab960e4fcacb9a9c9d24349bf9e7f1ec661d43 (patch)
tree43f55d561e84371f5325b69c1e2a1a7fa6cfc906
parent9fb9e4d6c2586cab1c2f948ae19e4de805d80002 (diff)
Delete the jupyter notebook and upload a .py file
-rw-r--r--Data Scraping for Binance Announcements.ipynb195
1 files changed, 0 insertions, 195 deletions
diff --git a/Data Scraping for Binance Announcements.ipynb b/Data Scraping for Binance Announcements.ipynb
deleted file mode 100644
index d7093b0..0000000
--- a/Data Scraping for Binance Announcements.ipynb
+++ /dev/null
@@ -1,195 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Data Scraping for Binance Announcements\n",
- "Beta version. Modified on 07-09-2019"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Import all the needed packages:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import bs4 as bs\n",
- "import urllib.request\n",
- "import tweepy, os, time"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Twitter app"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "consumer_key = os.environ.get('TW_CONSUMER_KEY')\n",
- "consumer_secret = os.environ.get('TW_CONSUMER_SECRET')\n",
- "access_token = os.environ.get('TW_ACCESS_TOKEN')\n",
- "access_token_secret = os.environ.get('TW_ACCESS_TOKEN_SECRET')\n",
- "# authentication of consumer key and secret\n",
- "auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n",
- "# authentication of access token and secret\n",
- "auth.set_access_token(access_token, access_token_secret)\n",
- "api = tweepy.API(auth)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Create two empty lists for storing news urls. Message should be sent when there is a new item in the news_urls that wasn't in the old_urls"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "old_urls, news_urls = [], []"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Create a bag of key words for getting matches. Don't use plurals, otherwise will get duplicates"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "key_words = ['List', 'list', 'Token Sale', 'Open Trading', 'open trading']"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Create the function to extract the information from the webpage and get the matchings"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "def extract_binance(main_webpage, key_words):\n",
- " final_item, final_list = [], []\n",
- " sauce = urllib.request.urlopen(main_webpage).read()\n",
- " soup = bs.BeautifulSoup(sauce, 'lxml')\n",
- " list = soup.find_all('li', class_ = 'article-list-item')\n",
- " for article in list:\n",
- " article_text = article.get_text().replace('\\n', '')\n",
- " for item in key_words:\n",
- " if item in article_text:\n",
- " final_item.append(article_text)\n",
- " final_item.append('https://www.binance.com' + article.find('a').get('href'))\n",
- " final_list.append(final_item)\n",
- " final_item = [] # Reset once is in the final_list to not get duplicates\n",
- " return final_list"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Get the first pass"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 30.9 ms, sys: 349 µs, total: 31.2 ms\n",
- "Wall time: 77.1 ms\n"
- ]
- }
- ],
- "source": [
- "%%time\n",
- "main_webpage = 'https://www.binance.com/en/support/categories/115000056351'\n",
- "old_urls = extract_binance(main_webpage, key_words)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Loop pass and get the new announcements"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Done for now. Time to go to sleep mate!\n"
- ]
- }
- ],
- "source": [
- "# Loop pass - Watchdog mode\n",
- "while True:\n",
- " new_urls = extract_binance(main_webpage, key_words)\n",
- " for item in new_urls:\n",
- " if item not in old_urls:\n",
- " msg = item[0] + '\\n' + item[1]\n",
- " api.update_status(msg)\n",
- " print('Done for now. Time to go to sleep mate!')\n",
- " time.sleep(900) # Check every 15 min"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}