From 4732bbbbd4d14a45d166781fde435e8b509e5a3e Mon Sep 17 00:00:00 2001 From: rusinthread Date: Fri, 6 Jan 2017 19:56:51 +0300 Subject: test data.md generation --- find_2ch_archive.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'find_2ch_archive.py') diff --git a/find_2ch_archive.py b/find_2ch_archive.py index 0358307..2ddb211 100755 --- a/find_2ch_archive.py +++ b/find_2ch_archive.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 import requests, re -import sys +#import sys def test_link_text(text): text = text.upper() @@ -31,15 +31,19 @@ def test_link_text(text): def full_url(url): return 'https://2ch.hk' + url -page = 500 -board = "b" -while page <= 600: - print("fetching page %d" % page) - url = "https://2ch.hk/%s/arch/%d.html" % (board, page) - r = requests.get(url) - for a in re.finditer(r'(.*?)', r.text, flags=re.I|re.M): - link_href = a.group(1) - link_text = a.group(2) - if test_link_text(link_text): - print("[%d] %s => %s" % (page, full_url(link_href), link_text)) - page += 1 +def find_triumfalno(): + page = 500 + board = "b" + while page <= 600: + #print("fetching page %d" % page) + url = "https://2ch.hk/%s/arch/%d.html" % (board, page) + r = requests.get(url) + for a in re.finditer(r'(.*?)', r.text, flags=re.I|re.M): + link_href = a.group(1) + link_text = a.group(2) + if test_link_text(link_text): + print("%s => %s" % (full_url(link_href), link_text)) + page += 1 + +if __name__ == '__main__': + find_triumfalno() -- cgit v1.2.3