fixHEAD master

author: Evgeny Zinoviev <me@ch1p.io> 2021-12-10 19:13:17 +0300
committer: Evgeny Zinoviev <me@ch1p.io> 2021-12-10 19:13:17 +0300
commit: 6c33b96ba61dbebebcac1e39e11c97051300fe37 (patch)
tree: 737bccca8ffa5b72836c93615fc833532d719691
parent: 7d011538f64a09dad7e89cce448975fbff23fb52 (diff)
2 files changed, 51 insertions, 45 deletions
diff --git a/binance-announcements-scraping-bot.py b/binance-announcements-scraping-bot.py
index de8f2af..23dcd0d 100755
--- a/binance-announcements-scraping-bot.py
+++ b/binance-announcements-scraping-bot.py
@@ -1,80 +1,86 @@
 #!/usr/bin/env python3
 import traceback
+import json
+import sys
+
 from requests import get
-from bs4 import BeautifulSoup
 from ch1p import State, telegram_notify
 from html import escape
 from argparse import ArgumentParser
 
 
 def scrap_announcements():
-    url = "https://www.binance.com/en/support/announcement"
-    response = get(url)
-    soup = BeautifulSoup(response.text, 'html.parser')
-
-    data = []
-    total_news = 0
-
-    categories_list = soup.find_all(class_='css-wmvdm0')
-    for c in categories_list:
-        category_title = c.select('h2[data-bn-type="text"]')[0].text
-        category_data = {
-            'title': category_title,
-            'news': []
-        }
+    response = get('https://www.binance.com/bapi/composite/v1/public/cms/article/list/query?type=1&pageNo=1&pageSize=50')
 
-        for link in c.find_next('div').select('a[data-bn-type="link"]'):
-            if link.text.strip().lower() == 'view more':
-                continue
+    data = json.loads(response.text)
+    categories = []
+    count = 0
 
-            href = link.get('href')
-            if href.startswith('/'):
-                href = f'https://www.binance.com{href}'
-            category_data['news'].append({
-                'text': link.text,
-                'link': href
+    for catalog in data['data']['catalogs']:
+        category = {
+            'name': catalog['catalogName'],
+            'articles': []
+        }
+
+        for article in catalog['articles']:
+            category['articles'].append({
+                'url': f'https://www.binance.com/en/support/announcement/{article["code"]}',
+                'rel_date': article['releaseDate'],
+                'title': article['title']
             })
-            total_news += 1
+            count += 1
 
-        data.append(category_data)
+        categories.append(category)
 
-    if not total_news:
+    if not count:
         raise RuntimeError('failed to find any articles')
 
-    return data
+    return categories
 
 
-if __name__ == '__main__':
-    parser = ArgumentParser()
-    parser.add_argument('--stdout', action='store_true')
-    args = parser.parse_args()
+def main(print_to_stdout: bool):
+    last_rel_date = 0
+    state = State(default={'urls': [], 'last_rel_date': last_rel_date})
+    if 'last_rel_date' in state:
+        last_rel_date = state['last_rel_date']
 
-    state = State(default=dict(urls=[]))
     try:
         blocks = []
         data = scrap_announcements()
         for category in data:
             updates = []
-            for item in category['news']:
-                if item['link'] not in state['urls']:
-                    updates.append(item)
-                    state['urls'].append(item['link'])
+            for item in category['articles']:
+                if item['rel_date'] <= last_rel_date or item['url'] in state['urls']:
+                    continue
+
+                updates.append(item)
+                if item['rel_date'] > last_rel_date:
+                    last_rel_date = item['rel_date']
 
             if updates:
-                buf = f"<b>Binance: {category['title']}</b>\n"
-                buf += '\n'.join(list(map(lambda item: f"<a href='{item['link']}'>{item['text']}</a>", updates)))
+                buf = f"<b>Binance: {category['name']}</b>\n"
+                buf += '\n'.join(list(map(lambda a: f"<a href='{a['url']}'>{a['title']}</a>", updates)))
                 blocks.append(buf)
 
+        state['last_rel_date'] = last_rel_date
+
         if blocks:
             message = '\n\n'.join(blocks)
-
-            if args.stdout:
+            if print_to_stdout:
                 print(message)
             else:
                 telegram_notify(text=message, parse_mode='HTML', disable_web_page_preview=True)
-
     except:
-        if args.stdout:
+        if print_to_stdout:
             traceback.print_exc()
         else:
-            telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML')
-\ No newline at end of file
+            telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML')
+
+
+if __name__ == '__main__':
+    parser = ArgumentParser()
+    parser.add_argument('--stdout', action='store_true')
+    args = parser.parse_args()
+
+    main(args.stdout)
+
diff --git a/requirements.txt b/requirements.txt
index d9826d9..03cb679 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 requests~=2.26.0
 beautifulsoup4~=4.10.0
-ch1p~=0.0.6
-\ No newline at end of file
+ch1p~=0.0.7
+\ No newline at end of file
author	Evgeny Zinoviev <me@ch1p.io>	2021-12-10 19:13:17 +0300
committer	Evgeny Zinoviev <me@ch1p.io>	2021-12-10 19:13:17 +0300
commit	6c33b96ba61dbebebcac1e39e11c97051300fe37 (patch)
tree	737bccca8ffa5b72836c93615fc833532d719691
parent	7d011538f64a09dad7e89cce448975fbff23fb52 (diff)