From 0d9f24ab5fa642fe898f586aa72f372274243c4c Mon Sep 17 00:00:00 2001 From: David Arroyo Date: Sun, 26 Jul 2020 10:54:19 +0100 Subject: Old database checking and updated list developed --- binance-scraping-bot.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'binance-scraping-bot.py') diff --git a/binance-scraping-bot.py b/binance-scraping-bot.py index 5ee9053..bf350c3 100644 --- a/binance-scraping-bot.py +++ b/binance-scraping-bot.py @@ -1,4 +1,5 @@ import os, requests +import pandas as pd from bs4 import BeautifulSoup from requests import get from discord_webhook import DiscordWebhook @@ -15,15 +16,29 @@ news_list = soup.find_all(class_ = 'css-sbrje5') # Create a bag of key words for getting matches key_words = ['list', 'token sale', 'open trading', 'opens trading', 'perpetual'] +# Open old database file +path = "C:/Users/d645daar/Documents/Codes/Binance Announcements/db.xlsx" +df = pd.read_excel(path) + +# Empty list +updated_list = [] + for news in news_list: article_text = news.text # Check for matchings for item in key_words: - if item in article_text.lower(): + if (item in article_text.lower()) and (article_text not in df.values): article_link = 'https://www.binance.com' + news.get('href') msg = article_text + '\n' + article_link + updated_list.append([article_text, article_link]) + print(article_text) # Send message to Discord server webhook = DiscordWebhook(url=url_wb, content=msg) - response = webhook.execute() \ No newline at end of file + response = webhook.execute() + +# Export updated news to Excel +cols = ['Text', 'Link'] +df = df.append(pd.DataFrame(updated_list, columns=cols), ignore_index = True) +df.to_excel(path, index = False) \ No newline at end of file -- cgit v1.2.3