diff options
author | rusinthread <rusinthread@cock.li> | 2017-01-06 19:56:51 +0300 |
---|---|---|
committer | rusinthread <rusinthread@cock.li> | 2017-01-06 19:56:51 +0300 |
commit | 4732bbbbd4d14a45d166781fde435e8b509e5a3e (patch) | |
tree | c00d8ea44c24b446c5801b8a18b8ae59e8948f50 /find_2ch_archive.py | |
parent | e71e7160fd8104814758bab0e306f5b74a8f2402 (diff) |
test data.md generation
Diffstat (limited to 'find_2ch_archive.py')
-rwxr-xr-x | find_2ch_archive.py | 30 |
1 files changed, 17 insertions, 13 deletions
diff --git a/find_2ch_archive.py b/find_2ch_archive.py index 0358307..2ddb211 100755 --- a/find_2ch_archive.py +++ b/find_2ch_archive.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 import requests, re -import sys +#import sys def test_link_text(text): text = text.upper() @@ -31,15 +31,19 @@ def test_link_text(text): def full_url(url): return 'https://2ch.hk' + url -page = 500 -board = "b" -while page <= 600: - print("fetching page %d" % page) - url = "https://2ch.hk/%s/arch/%d.html" % (board, page) - r = requests.get(url) - for a in re.finditer(r'<a href="(/'+board+'/arch/[\d-]+/res/\d+\.html)">(.*?)</a>', r.text, flags=re.I|re.M): - link_href = a.group(1) - link_text = a.group(2) - if test_link_text(link_text): - print("[%d] %s => %s" % (page, full_url(link_href), link_text)) - page += 1 +def find_triumfalno(): + page = 500 + board = "b" + while page <= 600: + #print("fetching page %d" % page) + url = "https://2ch.hk/%s/arch/%d.html" % (board, page) + r = requests.get(url) + for a in re.finditer(r'<a href="(/'+board+'/arch/[\d-]+/res/\d+\.html)">(.*?)</a>', r.text, flags=re.I|re.M): + link_href = a.group(1) + link_text = a.group(2) + if test_link_text(link_text): + print("%s => %s" % (full_url(link_href), link_text)) + page += 1 + +if __name__ == '__main__': + find_triumfalno() |