Old database checking and updated list developed

author: David Arroyo <darroyolpz@users.noreply.github.com> 2020-07-26 10:54:19 +0100
committer: GitHub <noreply@github.com> 2020-07-26 10:54:19 +0100
commit: 0d9f24ab5fa642fe898f586aa72f372274243c4c (patch)
tree: 5d102f6a1f419afab35b13ebba96381280000c5e
parent: 5171cac79c360e3261f47595816f9205e73f49c8 (diff)
1 files changed, 17 insertions, 2 deletions
diff --git a/binance-scraping-bot.py b/binance-scraping-bot.py
index 5ee9053..bf350c3 100644
--- a/binance-scraping-bot.py
+++ b/binance-scraping-bot.py
@@ -1,4 +1,5 @@
 import os, requests
+import pandas as pd
 from bs4 import BeautifulSoup
 from requests import get
 from discord_webhook import DiscordWebhook
@@ -15,15 +16,29 @@ news_list = soup.find_all(class_ = 'css-sbrje5')
 # Create a bag of key words for getting matches
 key_words = ['list', 'token sale', 'open trading', 'opens trading', 'perpetual']
 
+# Open old database file
+path = "C:/Users/d645daar/Documents/Codes/Binance Announcements/db.xlsx"
+df = pd.read_excel(path)
+
+# Empty list
+updated_list = []
+
 for news in news_list:
 	article_text = news.text
 
 	# Check for matchings
 	for item in key_words:
-		if item in article_text.lower():
+		if (item in article_text.lower()) and (article_text not in df.values):
 			article_link = 'https://www.binance.com' + news.get('href')
 			msg = article_text + '\n' + article_link
+			updated_list.append([article_text, article_link])
+			print(article_text)
 
 			# Send message to Discord server
 			webhook = DiscordWebhook(url=url_wb, content=msg)
-			response = webhook.execute()
-\ No newline at end of file
+			response = webhook.execute()
+
+# Export updated news to Excel
+cols = ['Text', 'Link']
+df = df.append(pd.DataFrame(updated_list, columns=cols), ignore_index = True)
+df.to_excel(path, index = False)
+\ No newline at end of file
author	David Arroyo <darroyolpz@users.noreply.github.com>	2020-07-26 10:54:19 +0100
committer	GitHub <noreply@github.com>	2020-07-26 10:54:19 +0100
commit	0d9f24ab5fa642fe898f586aa72f372274243c4c (patch)
tree	5d102f6a1f419afab35b13ebba96381280000c5e
parent	5171cac79c360e3261f47595816f9205e73f49c8 (diff)