From 6c33b96ba61dbebebcac1e39e11c97051300fe37 Mon Sep 17 00:00:00 2001 From: Evgeny Zinoviev Date: Fri, 10 Dec 2021 19:13:17 +0300 Subject: fix --- binance-announcements-scraping-bot.py | 94 +++++++++++++++++++---------------- requirements.txt | 2 +- 2 files changed, 51 insertions(+), 45 deletions(-) diff --git a/binance-announcements-scraping-bot.py b/binance-announcements-scraping-bot.py index de8f2af..23dcd0d 100755 --- a/binance-announcements-scraping-bot.py +++ b/binance-announcements-scraping-bot.py @@ -1,80 +1,86 @@ #!/usr/bin/env python3 import traceback +import json +import sys + from requests import get -from bs4 import BeautifulSoup from ch1p import State, telegram_notify from html import escape from argparse import ArgumentParser def scrap_announcements(): - url = "https://www.binance.com/en/support/announcement" - response = get(url) - soup = BeautifulSoup(response.text, 'html.parser') - - data = [] - total_news = 0 - - categories_list = soup.find_all(class_='css-wmvdm0') - for c in categories_list: - category_title = c.select('h2[data-bn-type="text"]')[0].text - category_data = { - 'title': category_title, - 'news': [] - } + response = get('https://www.binance.com/bapi/composite/v1/public/cms/article/list/query?type=1&pageNo=1&pageSize=50') - for link in c.find_next('div').select('a[data-bn-type="link"]'): - if link.text.strip().lower() == 'view more': - continue + data = json.loads(response.text) + categories = [] + count = 0 - href = link.get('href') - if href.startswith('/'): - href = f'https://www.binance.com{href}' - category_data['news'].append({ - 'text': link.text, - 'link': href + for catalog in data['data']['catalogs']: + category = { + 'name': catalog['catalogName'], + 'articles': [] + } + + for article in catalog['articles']: + category['articles'].append({ + 'url': f'https://www.binance.com/en/support/announcement/{article["code"]}', + 'rel_date': article['releaseDate'], + 'title': article['title'] }) - total_news += 1 + count += 1 - data.append(category_data) + categories.append(category) - if not total_news: + if not count: raise RuntimeError('failed to find any articles') - return data + return categories -if __name__ == '__main__': - parser = ArgumentParser() - parser.add_argument('--stdout', action='store_true') - args = parser.parse_args() +def main(print_to_stdout: bool): + last_rel_date = 0 + state = State(default={'urls': [], 'last_rel_date': last_rel_date}) + if 'last_rel_date' in state: + last_rel_date = state['last_rel_date'] - state = State(default=dict(urls=[])) try: blocks = [] data = scrap_announcements() for category in data: updates = [] - for item in category['news']: - if item['link'] not in state['urls']: - updates.append(item) - state['urls'].append(item['link']) + for item in category['articles']: + if item['rel_date'] <= last_rel_date or item['url'] in state['urls']: + continue + + updates.append(item) + if item['rel_date'] > last_rel_date: + last_rel_date = item['rel_date'] if updates: - buf = f"Binance: {category['title']}\n" - buf += '\n'.join(list(map(lambda item: f"{item['text']}", updates))) + buf = f"Binance: {category['name']}\n" + buf += '\n'.join(list(map(lambda a: f"{a['title']}", updates))) blocks.append(buf) + state['last_rel_date'] = last_rel_date + if blocks: message = '\n\n'.join(blocks) - - if args.stdout: + if print_to_stdout: print(message) else: telegram_notify(text=message, parse_mode='HTML', disable_web_page_preview=True) - except: - if args.stdout: + if print_to_stdout: traceback.print_exc() else: - telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML') \ No newline at end of file + telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML') + + +if __name__ == '__main__': + parser = ArgumentParser() + parser.add_argument('--stdout', action='store_true') + args = parser.parse_args() + + main(args.stdout) + diff --git a/requirements.txt b/requirements.txt index d9826d9..03cb679 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ requests~=2.26.0 beautifulsoup4~=4.10.0 -ch1p~=0.0.6 \ No newline at end of file +ch1p~=0.0.7 \ No newline at end of file -- cgit v1.2.3