aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvgeny Zinoviev <me@ch1p.io>2021-12-10 19:13:17 +0300
committerEvgeny Zinoviev <me@ch1p.io>2021-12-10 19:13:17 +0300
commit6c33b96ba61dbebebcac1e39e11c97051300fe37 (patch)
tree737bccca8ffa5b72836c93615fc833532d719691
parent7d011538f64a09dad7e89cce448975fbff23fb52 (diff)
-rwxr-xr-xbinance-announcements-scraping-bot.py94
-rw-r--r--requirements.txt2
2 files changed, 51 insertions, 45 deletions
diff --git a/binance-announcements-scraping-bot.py b/binance-announcements-scraping-bot.py
index de8f2af..23dcd0d 100755
--- a/binance-announcements-scraping-bot.py
+++ b/binance-announcements-scraping-bot.py
@@ -1,80 +1,86 @@
#!/usr/bin/env python3
import traceback
+import json
+import sys
+
from requests import get
-from bs4 import BeautifulSoup
from ch1p import State, telegram_notify
from html import escape
from argparse import ArgumentParser
def scrap_announcements():
- url = "https://www.binance.com/en/support/announcement"
- response = get(url)
- soup = BeautifulSoup(response.text, 'html.parser')
-
- data = []
- total_news = 0
-
- categories_list = soup.find_all(class_='css-wmvdm0')
- for c in categories_list:
- category_title = c.select('h2[data-bn-type="text"]')[0].text
- category_data = {
- 'title': category_title,
- 'news': []
- }
+ response = get('https://www.binance.com/bapi/composite/v1/public/cms/article/list/query?type=1&pageNo=1&pageSize=50')
- for link in c.find_next('div').select('a[data-bn-type="link"]'):
- if link.text.strip().lower() == 'view more':
- continue
+ data = json.loads(response.text)
+ categories = []
+ count = 0
- href = link.get('href')
- if href.startswith('/'):
- href = f'https://www.binance.com{href}'
- category_data['news'].append({
- 'text': link.text,
- 'link': href
+ for catalog in data['data']['catalogs']:
+ category = {
+ 'name': catalog['catalogName'],
+ 'articles': []
+ }
+
+ for article in catalog['articles']:
+ category['articles'].append({
+ 'url': f'https://www.binance.com/en/support/announcement/{article["code"]}',
+ 'rel_date': article['releaseDate'],
+ 'title': article['title']
})
- total_news += 1
+ count += 1
- data.append(category_data)
+ categories.append(category)
- if not total_news:
+ if not count:
raise RuntimeError('failed to find any articles')
- return data
+ return categories
-if __name__ == '__main__':
- parser = ArgumentParser()
- parser.add_argument('--stdout', action='store_true')
- args = parser.parse_args()
+def main(print_to_stdout: bool):
+ last_rel_date = 0
+ state = State(default={'urls': [], 'last_rel_date': last_rel_date})
+ if 'last_rel_date' in state:
+ last_rel_date = state['last_rel_date']
- state = State(default=dict(urls=[]))
try:
blocks = []
data = scrap_announcements()
for category in data:
updates = []
- for item in category['news']:
- if item['link'] not in state['urls']:
- updates.append(item)
- state['urls'].append(item['link'])
+ for item in category['articles']:
+ if item['rel_date'] <= last_rel_date or item['url'] in state['urls']:
+ continue
+
+ updates.append(item)
+ if item['rel_date'] > last_rel_date:
+ last_rel_date = item['rel_date']
if updates:
- buf = f"<b>Binance: {category['title']}</b>\n"
- buf += '\n'.join(list(map(lambda item: f"<a href='{item['link']}'>{item['text']}</a>", updates)))
+ buf = f"<b>Binance: {category['name']}</b>\n"
+ buf += '\n'.join(list(map(lambda a: f"<a href='{a['url']}'>{a['title']}</a>", updates)))
blocks.append(buf)
+ state['last_rel_date'] = last_rel_date
+
if blocks:
message = '\n\n'.join(blocks)
-
- if args.stdout:
+ if print_to_stdout:
print(message)
else:
telegram_notify(text=message, parse_mode='HTML', disable_web_page_preview=True)
-
except:
- if args.stdout:
+ if print_to_stdout:
traceback.print_exc()
else:
- telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML') \ No newline at end of file
+ telegram_notify(text='error: ' + escape(traceback.format_exc()), parse_mode='HTML')
+
+
+if __name__ == '__main__':
+ parser = ArgumentParser()
+ parser.add_argument('--stdout', action='store_true')
+ args = parser.parse_args()
+
+ main(args.stdout)
+
diff --git a/requirements.txt b/requirements.txt
index d9826d9..03cb679 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
requests~=2.26.0
beautifulsoup4~=4.10.0
-ch1p~=0.0.6 \ No newline at end of file
+ch1p~=0.0.7 \ No newline at end of file