aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Arroyo <darroyolpz@users.noreply.github.com>2020-07-26 10:54:19 +0100
committerGitHub <noreply@github.com>2020-07-26 10:54:19 +0100
commit0d9f24ab5fa642fe898f586aa72f372274243c4c (patch)
tree5d102f6a1f419afab35b13ebba96381280000c5e
parent5171cac79c360e3261f47595816f9205e73f49c8 (diff)
Old database checking and updated list developed
-rw-r--r--binance-scraping-bot.py19
1 files changed, 17 insertions, 2 deletions
diff --git a/binance-scraping-bot.py b/binance-scraping-bot.py
index 5ee9053..bf350c3 100644
--- a/binance-scraping-bot.py
+++ b/binance-scraping-bot.py
@@ -1,4 +1,5 @@
import os, requests
+import pandas as pd
from bs4 import BeautifulSoup
from requests import get
from discord_webhook import DiscordWebhook
@@ -15,15 +16,29 @@ news_list = soup.find_all(class_ = 'css-sbrje5')
# Create a bag of key words for getting matches
key_words = ['list', 'token sale', 'open trading', 'opens trading', 'perpetual']
+# Open old database file
+path = "C:/Users/d645daar/Documents/Codes/Binance Announcements/db.xlsx"
+df = pd.read_excel(path)
+
+# Empty list
+updated_list = []
+
for news in news_list:
article_text = news.text
# Check for matchings
for item in key_words:
- if item in article_text.lower():
+ if (item in article_text.lower()) and (article_text not in df.values):
article_link = 'https://www.binance.com' + news.get('href')
msg = article_text + '\n' + article_link
+ updated_list.append([article_text, article_link])
+ print(article_text)
# Send message to Discord server
webhook = DiscordWebhook(url=url_wb, content=msg)
- response = webhook.execute() \ No newline at end of file
+ response = webhook.execute()
+
+# Export updated news to Excel
+cols = ['Text', 'Link']
+df = df.append(pd.DataFrame(updated_list, columns=cols), ignore_index = True)
+df.to_excel(path, index = False) \ No newline at end of file