diff options
author | Evgeny Zinoviev <me@ch1p.io> | 2021-12-10 19:13:17 +0300 |
---|---|---|
committer | Evgeny Zinoviev <me@ch1p.io> | 2021-12-10 19:13:17 +0300 |
commit | 6c33b96ba61dbebebcac1e39e11c97051300fe37 (patch) | |
tree | 737bccca8ffa5b72836c93615fc833532d719691 | |
parent | 7d011538f64a09dad7e89cce448975fbff23fb52 (diff) |
-rwxr-xr-x | binance-announcements-scraping-bot.py | 94 | ||||
-rw-r--r-- | requirements.txt | 2 |
2 files changed, 51 insertions, 45 deletions
diff --git a/binance-announcements-scraping-bot.py b/binance-announcements-scraping-bot.py index de8f2af..23dcd0d 100755 --- a/binance-announcements-scraping-bot.py +++ b/binance-announcements-scraping-bot.py @@ -1,80 +1,86 @@ #!/usr/bin/env python3 import traceback +import json +import sys + from requests import get -from bs4 import BeautifulSoup from ch1p import State, telegram_notify from html import escape from argparse import ArgumentParser def scrap_announcements(): - url = "https://www.binance.com/en/support/announcement" - response = get(url) - soup = BeautifulSoup(response.text, 'html.parser') - - data = [] - total_news = 0 - - categories_list = soup.find_all(class_='css-wmvdm0') - for c in categories_list: - category_title = c.select('h2[data-bn-type="text"]')[0].text - category_data = { - 'title': category_title, - 'news': [] - } + response = get('https://www.binance.com/bapi/composite/v1/public/cms/article/list/query?type=1&pageNo=1&pageSize=50') - for link in c.find_next('div').select('a[data-bn-type="link"]'): - if link.text.strip().lower() == 'view more': - continue + data = json.loads(response.text) + categories = [] + count = 0 - href = link.get('href') - if href.startswith('/'): - href = f'https://www.binance.com{href}' - category_data['news'].append({ - 'text': link.text, - 'link': href + for catalog in data['data']['catalogs']: + category = { + 'name': catalog['catalogName'], + 'articles': [] + } + + for article in catalog['articles']: + category['articles'].append({ + 'url': f'https://www.binance.com/en/support/announcement/{article["code"]}', + 'rel_date': article['releaseDate'], + 'title': article['title'] }) - total_news += 1 + count += 1 - data.append(category_data) + categories.append(category) - if not total_news: + if not count: raise RuntimeError('failed to find any articles') - return data + return categories -if __name__ == '__main__': - parser = ArgumentParser() - parser.add_argument('--stdout', action='store_true') - args = parser.parse_args() +def main(print_to_stdout: bool): + last_rel_date = 0 + state = State(default={'urls': [], 'last_rel_date': last_rel_date}) + if 'last_rel_date' in state: + last_rel_date = state['last_rel_date'] - state = State(default=dict(urls=[])) try: blocks = [] data = scrap_announcements() for category in data: updates = [] - for item in category['news']: - if item['link'] not in state['urls']: - updates.append(item) - state['urls'].append(item['link']) + for item in category['articles']: + if item['rel_date'] <= last_rel_date or item['url'] in state['urls']: + continue + + updates.append(item) + if item['rel_date'] > last_rel_date: + last_rel_date = item['rel_date'] if updates: - buf = f"<b>Binance: {category['title']}</b>\n" - buf += '\n'.join(list(map(lambda item: f"<a href='{item['link']}'>{item['text']}</a>", updates))) + buf = f"<b>Binance: {category['name']}</b>\n" + buf += '\n'.join(list(map(lambda a: f"<a href='{a['url']}'>{a['title']}</a>", updates))) blocks.append(buf) + state['last_rel_date'] = last_rel_date + if blocks: message = '\n\n'.join(blocks) - - if args.stdout: + if print_to_stdout: print(message) else: telegram_notify(text=message, parse_mode='HTML', disable_web_page_preview=True) - except: - if args.stdout: + if print_to_stdout: traceback.print_exc() else: - telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML')
\ No newline at end of file + telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML') + + +if __name__ == '__main__': + parser = ArgumentParser() + parser.add_argument('--stdout', action='store_true') + args = parser.parse_args() + + main(args.stdout) + diff --git a/requirements.txt b/requirements.txt index d9826d9..03cb679 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ requests~=2.26.0 beautifulsoup4~=4.10.0 -ch1p~=0.0.6
\ No newline at end of file +ch1p~=0.0.7
\ No newline at end of file |