fix parsing, add telegram notifications

author: Evgeny Zinoviev <me@ch1p.io> 2021-09-16 17:01:10 +0300
committer: Evgeny Zinoviev <me@ch1p.io> 2021-09-16 17:01:10 +0300
commit: 38f3737e7213701e4be01668dff181b7db3f8f7a (patch)
tree: 71a438af9797c29062a1c2a6cc337f3adf0b397e
parent: c2dbff2e2af459ce189b31fa4871bb0023c6bb21 (diff)
5 files changed, 81 insertions, 48 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..485dee6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.idea
diff --git a/README.md b/README.md
index 5e96ad8..5d40702 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,11 @@
 # Binance-Announcements
-Web scraping for getting notifications on new announcements
 
-Like it or not, everything Binance touches turns to gold. New listings on their platform are a great opportunity for traders, so they'd better be well-informed.
+Fork of https://github.com/darroyolpz/Binance-Announcements.
 
-CHZ coin was listed on their platform yesterday, but unfortunately I missed the announcement on their webpage.
+- Fixed html layout parsing
+- Removed discord
+- Added Telegram notifications
 
-In order to not happen again, I've created a short script to get the notifications automatically from Binance page. It will run indefinitely in my computer, so everytime they release a new announcement, I'll receive a tweet and a telegram message.
+## License
+
+MIT
+\ No newline at end of file
diff --git a/binance-announcement-scraping-bot.py b/binance-announcement-scraping-bot.py
new file mode 100755
index 0000000..f908857
--- /dev/null
+++ b/binance-announcement-scraping-bot.py
@@ -0,0 +1,70 @@
+#!usr/bin/env python3
+import sys, traceback
+from requests import get
+from bs4 import BeautifulSoup
+from ch1p import State, telegram_notify
+from html import escape
+
+
+def scrap_announcements():
+    url = "https://www.binance.com/en/support/announcement"
+    response = get(url)
+    soup = BeautifulSoup(response.text, 'html.parser')
+
+    data = []
+    total_news = 0
+
+    categories_list = soup.find_all(class_='css-wmvdm0')
+    for c in categories_list:
+        category_title = c.select('h2[data-bn-type="text"]')[0].text
+        category_data = {
+            'title': category_title,
+            'news': []
+        }
+
+        for link in c.find_next('div').select('a[data-bn-type="link"]'):
+            id = link.get('id')
+            if id is None:
+                continue
+            if not link.get('id').startswith('supportList'):
+                continue
+
+            category_data['news'].append({
+                'text': link.text,
+                'link': link.get('href')
+            })
+            total_news += 1
+
+        data.append(category_data)
+
+    if not total_news:
+        raise RuntimeError('failed to find any articles')
+
+    return data
+
+
+if __name__ == '__main__':
+    state = State(default=dict(urls=[]))
+    try:
+        blocks = []
+        data = scrap_announcements()
+        for category in data:
+            updates = []
+            for item in category['news']:
+                if item['link'] not in state['urls']:
+                    updates.append(item)
+                    state['urls'].append(item['link'])
+
+            if updates:
+                buf = f"<i>{category['title']}</i>\n"
+                buf += '\n'.join(list(map(lambda item: f"<a href=\"{item['link']}\">{item['text']}</a>", updates)))
+                blocks.append(buf)
+
+        if blocks:
+            message = '<b>Binance Announcements</b>\n\n'
+            message += '\n\n'.join(blocks)
+
+            telegram_notify(text=message, parse_mode='HTML')
+
+    except:
+        telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML')
+\ No newline at end of file
diff --git a/binance-scraping-bot.py b/binance-scraping-bot.py
deleted file mode 100644
index 79fae56..0000000
--- a/binance-scraping-bot.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import os, requests
-import pandas as pd
-from bs4 import BeautifulSoup
-from requests import get
-from discord_webhook import DiscordWebhook
-
-# Webhook settings
-url_wb = os.environ.get('DISCORD_WH')
-
-# Data for the scrap
-url = "https://www.binance.com/en/support/announcement"
-response = get(url)
-soup = BeautifulSoup(response.text, 'html.parser')
-news_list = soup.find_all(class_ = 'css-sbrje5')
-
-# Create a bag of key words for getting matches
-key_words = ['list', 'token sale', 'open trading', 'opens trading', 'perpetual', 'defi', 'uniswap', 'airdrop']
-
-# Open old database file
-path = "/home/pi/OpenAlpha/db.xlsx"
-df = pd.read_excel(path)
-
-# Empty list
-updated_list = []
-
-for news in news_list:
-	article_text = news.text
-
-	# Check for matchings
-	for item in key_words:
-		if (item in article_text.lower()) and (article_text not in df.values):
-			article_link = 'https://www.binance.com' + news.get('href')
-			msg = article_text + '\n' + article_link
-			updated_list.append([article_text, article_link])
-			print(article_text)
-
-			# Send message to Discord server
-			webhook = DiscordWebhook(url=url_wb, content=msg)
-			response = webhook.execute()
-
-# Export updated news to Excel
-cols = ['Text', 'Link']
-df = df.append(pd.DataFrame(updated_list, columns=cols), ignore_index = True)
-df.to_excel(path, index = False)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e2b8800
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+requests~=2.26.0
+beautifulsoup4~=4.10.0
+ch1p~=0.0.5
+\ No newline at end of file
author	Evgeny Zinoviev <me@ch1p.io>	2021-09-16 17:01:10 +0300
committer	Evgeny Zinoviev <me@ch1p.io>	2021-09-16 17:01:10 +0300
commit	38f3737e7213701e4be01668dff181b7db3f8f7a (patch)
tree	71a438af9797c29062a1c2a6cc337f3adf0b397e
parent	c2dbff2e2af459ce189b31fa4871bb0023c6bb21 (diff)