diff options
author | darroyolpz <darroyolpz@users.noreply.github.com> | 2019-09-09 23:32:42 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-09-09 23:32:42 +0200 |
commit | aaab960e4fcacb9a9c9d24349bf9e7f1ec661d43 (patch) | |
tree | 43f55d561e84371f5325b69c1e2a1a7fa6cfc906 | |
parent | 9fb9e4d6c2586cab1c2f948ae19e4de805d80002 (diff) |
Delete the jupyter notebook and upload a .py file
-rw-r--r-- | Data Scraping for Binance Announcements.ipynb | 195 |
1 files changed, 0 insertions, 195 deletions
diff --git a/Data Scraping for Binance Announcements.ipynb b/Data Scraping for Binance Announcements.ipynb deleted file mode 100644 index d7093b0..0000000 --- a/Data Scraping for Binance Announcements.ipynb +++ /dev/null @@ -1,195 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Data Scraping for Binance Announcements\n", - "Beta version. Modified on 07-09-2019" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Import all the needed packages:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import bs4 as bs\n", - "import urllib.request\n", - "import tweepy, os, time" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Twitter app" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "consumer_key = os.environ.get('TW_CONSUMER_KEY')\n", - "consumer_secret = os.environ.get('TW_CONSUMER_SECRET')\n", - "access_token = os.environ.get('TW_ACCESS_TOKEN')\n", - "access_token_secret = os.environ.get('TW_ACCESS_TOKEN_SECRET')\n", - "# authentication of consumer key and secret\n", - "auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n", - "# authentication of access token and secret\n", - "auth.set_access_token(access_token, access_token_secret)\n", - "api = tweepy.API(auth)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create two empty lists for storing news urls. Message should be sent when there is a new item in the news_urls that wasn't in the old_urls" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "old_urls, news_urls = [], []" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create a bag of key words for getting matches. Don't use plurals, otherwise will get duplicates" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "key_words = ['List', 'list', 'Token Sale', 'Open Trading', 'open trading']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create the function to extract the information from the webpage and get the matchings" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "def extract_binance(main_webpage, key_words):\n", - " final_item, final_list = [], []\n", - " sauce = urllib.request.urlopen(main_webpage).read()\n", - " soup = bs.BeautifulSoup(sauce, 'lxml')\n", - " list = soup.find_all('li', class_ = 'article-list-item')\n", - " for article in list:\n", - " article_text = article.get_text().replace('\\n', '')\n", - " for item in key_words:\n", - " if item in article_text:\n", - " final_item.append(article_text)\n", - " final_item.append('https://www.binance.com' + article.find('a').get('href'))\n", - " final_list.append(final_item)\n", - " final_item = [] # Reset once is in the final_list to not get duplicates\n", - " return final_list" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Get the first pass" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 30.9 ms, sys: 349 µs, total: 31.2 ms\n", - "Wall time: 77.1 ms\n" - ] - } - ], - "source": [ - "%%time\n", - "main_webpage = 'https://www.binance.com/en/support/categories/115000056351'\n", - "old_urls = extract_binance(main_webpage, key_words)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Loop pass and get the new announcements" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Done for now. Time to go to sleep mate!\n" - ] - } - ], - "source": [ - "# Loop pass - Watchdog mode\n", - "while True:\n", - " new_urls = extract_binance(main_webpage, key_words)\n", - " for item in new_urls:\n", - " if item not in old_urls:\n", - " msg = item[0] + '\\n' + item[1]\n", - " api.update_status(msg)\n", - " print('Done for now. Time to go to sleep mate!')\n", - " time.sleep(900) # Check every 15 min" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} |