summaryrefslogtreecommitdiff
path: root/find_2ch_archive.py
diff options
context:
space:
mode:
authorrusinthread <rusinthread@cock.li>2017-01-06 19:56:51 +0300
committerrusinthread <rusinthread@cock.li>2017-01-06 19:56:51 +0300
commit4732bbbbd4d14a45d166781fde435e8b509e5a3e (patch)
treec00d8ea44c24b446c5801b8a18b8ae59e8948f50 /find_2ch_archive.py
parente71e7160fd8104814758bab0e306f5b74a8f2402 (diff)
test data.md generation
Diffstat (limited to 'find_2ch_archive.py')
-rwxr-xr-xfind_2ch_archive.py30
1 files changed, 17 insertions, 13 deletions
diff --git a/find_2ch_archive.py b/find_2ch_archive.py
index 0358307..2ddb211 100755
--- a/find_2ch_archive.py
+++ b/find_2ch_archive.py
@@ -1,6 +1,6 @@
#!/usr/bin/python3
import requests, re
-import sys
+#import sys
def test_link_text(text):
text = text.upper()
@@ -31,15 +31,19 @@ def test_link_text(text):
def full_url(url):
return 'https://2ch.hk' + url
-page = 500
-board = "b"
-while page <= 600:
- print("fetching page %d" % page)
- url = "https://2ch.hk/%s/arch/%d.html" % (board, page)
- r = requests.get(url)
- for a in re.finditer(r'<a href="(/'+board+'/arch/[\d-]+/res/\d+\.html)">(.*?)</a>', r.text, flags=re.I|re.M):
- link_href = a.group(1)
- link_text = a.group(2)
- if test_link_text(link_text):
- print("[%d] %s => %s" % (page, full_url(link_href), link_text))
- page += 1
+def find_triumfalno():
+ page = 500
+ board = "b"
+ while page <= 600:
+ #print("fetching page %d" % page)
+ url = "https://2ch.hk/%s/arch/%d.html" % (board, page)
+ r = requests.get(url)
+ for a in re.finditer(r'<a href="(/'+board+'/arch/[\d-]+/res/\d+\.html)">(.*?)</a>', r.text, flags=re.I|re.M):
+ link_href = a.group(1)
+ link_text = a.group(2)
+ if test_link_text(link_text):
+ print("%s => %s" % (full_url(link_href), link_text))
+ page += 1
+
+if __name__ == '__main__':
+ find_triumfalno()