summaryrefslogtreecommitdiff
path: root/main.py
diff options
context:
space:
mode:
Diffstat (limited to 'main.py')
-rwxr-xr-xmain.py257
1 files changed, 2 insertions, 255 deletions
diff --git a/main.py b/main.py
index 14151ed..34c0b33 100755
--- a/main.py
+++ b/main.py
@@ -2,12 +2,7 @@
import argparse
import sys
-import json
import os
-import re
-import time
-import datetime
-from pprint import pprint
try:
from termcolor import cprint
@@ -15,7 +10,8 @@ try:
except ImportError:
colors_supported = False
-CWD = os.path.dirname(os.path.realpath(__file__))
+from data_lib import load_data, decode_auto
+
def print_colored(s, color, fallback_prefix=''):
if colors_supported:
@@ -23,208 +19,7 @@ def print_colored(s, color, fallback_prefix=''):
else:
print(fallback_prefix + s)
-def load_data():
- with open(os.path.join(CWD, "data.json")) as f:
- data = json.loads(f.read())
-
- # ignore placeholders
- data = list(filter(lambda i: i['text'] != '', data))
-
- return data
-
-def clean_string(s, remove_junk=False):
- s = s.replace(')', ') ')
- s = re.sub(r'(\!|\.)([^\)])', r'\1 \2', s)
- #s = s.replace('/', ' ')
- s = s.upper()
-
- s = re.sub(r'\s+', ' ', s).strip()
-
- junks = [
- 'ВОЕННОЕ',
- 'ВЫШЕСТОЯЩИХ',
- 'ПРАВО',
- 'ПРАВИЛАМ ВОЙНЫ',
- 'ВЫПИСКА',
- 'КОНТРОЛЬ',
- 'ИХ',
- 'ПО',
- 'НАВЫКИ',
- 'С ВЫШЕСТОЯЩИМИ',
- #'ПРИСУТСТВИЕ',
- #'ЛИНЕЙНО',
- 'ЗАКОННО!',
- 'ПОХЛЕБКА',
- 'СВЯЗЕЙ',
- 'ЖУЮЩЕГО ХРЯЩИ',
- 'ИНДЕКСИРОВАН БЕЗУКОРИЗНЕННО',
- 'ОТКЛАДЫВАЕТСЯ ЛИНЕЙНО',
- '- ЕГО ВЕЛИЧЕСТВО',
- 'ГУБЕРНИЯ',
- 'С ВЫШЕСТОЯЩИМИ КОНТРОЛЬ',
- 'С ЛОКАЦИИ',
- #'КАЗНЬ',
- 'ГУБЕРНИЯ',
- 'ПРОВЕРКИ',
- 'УСТАНОВЛЕНО',
- 'ПОБЕДИТЕЛЕМ',
- #'СТАЛЬНЫЕ',
- 'НЕРВЫ',
- 'ДАРОВАНО',
- #'ТРАНСПОРТИРОВКА',
- 'ОДОБРЕНО',
- 'ПРОЯВЛЕНИЯ',
- 'УЗАКОНЕНО',
- 'ИМЕЕТСЯ',
- 'ЗНАЛ',
- 'НЕ ПРИМЕЧЕНО',
- 'НА СЕВЕР',
- 'ПРИГОВОРИТЬ',
- 'ШЕСТВУЕМ',
- 'ДАГОН',
- 'ДА МЕРЗНУЩИЙ',
- 'КОФЕ',
- #'РЕАГИРОВАНИЕ',
- 'УКАЗАНО',
- '- ВЫСОКИЙ ТИТУЛ',
- 'ЗАКАЗ',
- 'ЧЕРТЫ ЛИЦА',
-
- # english
- 'SCHOOL ON THE RIGHT',
- 'WILL NOT ALLOW',
- 'FLYWHEEL',
- 'TRIUMPHANTLY',
- 'BEING USED',
- 'NICE',
- 'UMBRELLA',
- #'BIOROBOT',
- 'CONSERVATISM',
- 'WAS ESTABLISHED',
- 'WITH A PASSWORD',
- 'ANT',
- 'YEAR',
- 'RECOGNIZED',
- 'SEARCHED'
- #'LEGAL',
- #'FIGHTING'
- ]
-
- # только без пробелов
- junks_words = list(filter(lambda w: ' ' not in w, junks))
-
- # только с пробелами
- junks_nwords = list(filter(lambda w: w not in junks_words, junks))
-
- if remove_junk:
- s = s.split(' ')
- s = list(filter(lambda l: re.sub(r'\.|\!$', '', l) not in junks_words, s))
- s = ' '.join(s)
-
- for j in junks_nwords:
- s = s.replace(j, '')
-
- # хортица - это буква Х
- s = s.replace('Х О Р Т И Ц А', 'Х_О_Р_Т_И_Ц_А')
-
- s = re.sub(r'\s+', ' ', s).strip()
- return s
-
-def decode(s, is_url=False):
- buf = ''
- for word in s.split(' '):
- word = word.strip()
- if word == '':
- continue
-
- if re.match(r'^\d+', word):
- buf += word
- elif is_url and word.endswith('://'):
- buf += word[0]
- buf += '://'
- else:
- letter = word[0]
- buf += letter
-
- return buf
-
-def decode2(s):
- buf = ''
- for s in re.split(r'[\?\.\!]+', s):
- s = s.strip()
- if s == '':
- continue
-
- words = s.split(' ')
-
- letter = words[1][0]
- buf += letter
-
- return buf
-
-def decode3(s):
- buf = ''
- for s in re.split(r'[\?\.\!]+', s):
- s = s.strip()
- s = s.replace(' ', '')
- s = s.replace('-', '')
- if not s:
- continue
-
- print(s)
- continue
-
- s = s.upper()
-
- if s[0] in ('Ш', 'Щ', 'И'):
- buf += s[0]
- elif s[4] == 'Й':
- buf += s[4]
- elif s[0] == 'И':
- buf += 'И'
- elif s[7] == 'М':
- buf += 'М'
- elif s[4] == 'А':
- buf += 'А'
- elif s[2] == 'Р':
- buf += 'Р'
- elif s[1] == 'У':
- buf += 'У'
- elif s[9] == 'Ю':
- buf += 'Ю'
- else:
- buf += '?'
-
- return buf
-
-
-
-
-
-# s: source
-# t: type
-def decode_auto(s, t, reverse_decoded=False, remove_junk=True):
- if t == 1:
- s = clean_string(s, remove_junk=remove_junk)
- result = decode(s)
-
- elif t == 2:
- result = decode2(s)
-
- elif t == 3:
- result = decode3(s)
- if reverse_decoded:
- # reverse string
- result = result[::-1]
-
- return result
-
-
-def sort_data_by_date(item):
- return int(time.mktime(datetime.datetime.strptime(item['date'], '%d/%m/%y').timetuple()))
-
-
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--decode', action='store_true')
@@ -235,7 +30,6 @@ def main():
parser.add_argument('--is-url', action='store_true')
parser.add_argument('--type', type=int, choices=[1, 2, 3], default=1)
parser.add_argument('--reverse-decoded', action='store_true')
- parser.add_argument('--gen-page', action='store_true')
args = parser.parse_args()
data = load_data()
@@ -289,52 +83,5 @@ def main():
count = len(data)
print("Total texts: %s" % count)
- elif args.gen_page:
- # sort by date
- data = sorted(data, key=sort_data_by_date)
- #pprint(data)
-
- buf = []
- for post in data:
- cipher_type = post['type'] if 'type' in post else 1
- decoded_text = decode_auto(post['text'], cipher_type)
-
- post_buf = ''
- post_buf += '**Дата**: %s\n\n' % post['date']
-
- if 'pic' in post and post['pic']:
- # make sure it is a list
- pic = post['pic'] if isinstance(post['pic'], list) else [post['pic']]
-
- pic_buf = []
- for p in pic:
- pic_buf.append('![](./img/%s =300x)' % p)
- post_buf += '**Пикрилейтед:** %s\n\n' % ''.join(pic_buf)
-
- if 'link' in post:
- # make sure it is a list
- link = post['link'] if isinstance(post['link'], list) else [post['link']]
-
- link_buf = []
- for l in link:
- link_buf.append('[%s](%s)' % (l, l))
- post_buf += '**Ссылки:** %s\n\n' % ', '.join(link_buf)
-
- if 'source' in post:
- post_buf += '**Источник:** %s' % post['source']
- if 'source_link' in post:
- post_buf += ', [%s](%s)\n\n' % (post['source_link'], post['source_link'])
- else:
- post_buf += '\n\n'
-
- post_buf += '**Шифровка (тип %d)**: \n```\n%s\n```\n\n' % (cipher_type, post['text'])
- post_buf += '**Расшифровка:** `%s`' % decoded_text
-
- buf.append(post_buf)
-
- md = "\n---\n".join(buf)
- with open(os.path.join(CWD, 'data.md'), 'w') as f:
- f.write(md)
-
if __name__ == '__main__':
sys.exit(main())