diff options
Diffstat (limited to 'main.py')
-rwxr-xr-x | main.py | 257 |
1 files changed, 2 insertions, 255 deletions
@@ -2,12 +2,7 @@ import argparse import sys -import json import os -import re -import time -import datetime -from pprint import pprint try: from termcolor import cprint @@ -15,7 +10,8 @@ try: except ImportError: colors_supported = False -CWD = os.path.dirname(os.path.realpath(__file__)) +from data_lib import load_data, decode_auto + def print_colored(s, color, fallback_prefix=''): if colors_supported: @@ -23,208 +19,7 @@ def print_colored(s, color, fallback_prefix=''): else: print(fallback_prefix + s) -def load_data(): - with open(os.path.join(CWD, "data.json")) as f: - data = json.loads(f.read()) - - # ignore placeholders - data = list(filter(lambda i: i['text'] != '', data)) - - return data - -def clean_string(s, remove_junk=False): - s = s.replace(')', ') ') - s = re.sub(r'(\!|\.)([^\)])', r'\1 \2', s) - #s = s.replace('/', ' ') - s = s.upper() - - s = re.sub(r'\s+', ' ', s).strip() - - junks = [ - 'ВОЕННОЕ', - 'ВЫШЕСТОЯЩИХ', - 'ПРАВО', - 'ПРАВИЛАМ ВОЙНЫ', - 'ВЫПИСКА', - 'КОНТРОЛЬ', - 'ИХ', - 'ПО', - 'НАВЫКИ', - 'С ВЫШЕСТОЯЩИМИ', - #'ПРИСУТСТВИЕ', - #'ЛИНЕЙНО', - 'ЗАКОННО!', - 'ПОХЛЕБКА', - 'СВЯЗЕЙ', - 'ЖУЮЩЕГО ХРЯЩИ', - 'ИНДЕКСИРОВАН БЕЗУКОРИЗНЕННО', - 'ОТКЛАДЫВАЕТСЯ ЛИНЕЙНО', - '- ЕГО ВЕЛИЧЕСТВО', - 'ГУБЕРНИЯ', - 'С ВЫШЕСТОЯЩИМИ КОНТРОЛЬ', - 'С ЛОКАЦИИ', - #'КАЗНЬ', - 'ГУБЕРНИЯ', - 'ПРОВЕРКИ', - 'УСТАНОВЛЕНО', - 'ПОБЕДИТЕЛЕМ', - #'СТАЛЬНЫЕ', - 'НЕРВЫ', - 'ДАРОВАНО', - #'ТРАНСПОРТИРОВКА', - 'ОДОБРЕНО', - 'ПРОЯВЛЕНИЯ', - 'УЗАКОНЕНО', - 'ИМЕЕТСЯ', - 'ЗНАЛ', - 'НЕ ПРИМЕЧЕНО', - 'НА СЕВЕР', - 'ПРИГОВОРИТЬ', - 'ШЕСТВУЕМ', - 'ДАГОН', - 'ДА МЕРЗНУЩИЙ', - 'КОФЕ', - #'РЕАГИРОВАНИЕ', - 'УКАЗАНО', - '- ВЫСОКИЙ ТИТУЛ', - 'ЗАКАЗ', - 'ЧЕРТЫ ЛИЦА', - - # english - 'SCHOOL ON THE RIGHT', - 'WILL NOT ALLOW', - 'FLYWHEEL', - 'TRIUMPHANTLY', - 'BEING USED', - 'NICE', - 'UMBRELLA', - #'BIOROBOT', - 'CONSERVATISM', - 'WAS ESTABLISHED', - 'WITH A PASSWORD', - 'ANT', - 'YEAR', - 'RECOGNIZED', - 'SEARCHED' - #'LEGAL', - #'FIGHTING' - ] - - # только без пробелов - junks_words = list(filter(lambda w: ' ' not in w, junks)) - - # только с пробелами - junks_nwords = list(filter(lambda w: w not in junks_words, junks)) - - if remove_junk: - s = s.split(' ') - s = list(filter(lambda l: re.sub(r'\.|\!$', '', l) not in junks_words, s)) - s = ' '.join(s) - - for j in junks_nwords: - s = s.replace(j, '') - - # хортица - это буква Х - s = s.replace('Х О Р Т И Ц А', 'Х_О_Р_Т_И_Ц_А') - - s = re.sub(r'\s+', ' ', s).strip() - return s - -def decode(s, is_url=False): - buf = '' - for word in s.split(' '): - word = word.strip() - if word == '': - continue - - if re.match(r'^\d+', word): - buf += word - elif is_url and word.endswith('://'): - buf += word[0] - buf += '://' - else: - letter = word[0] - buf += letter - - return buf - -def decode2(s): - buf = '' - for s in re.split(r'[\?\.\!]+', s): - s = s.strip() - if s == '': - continue - - words = s.split(' ') - - letter = words[1][0] - buf += letter - - return buf - -def decode3(s): - buf = '' - for s in re.split(r'[\?\.\!]+', s): - s = s.strip() - s = s.replace(' ', '') - s = s.replace('-', '') - if not s: - continue - - print(s) - continue - - s = s.upper() - - if s[0] in ('Ш', 'Щ', 'И'): - buf += s[0] - elif s[4] == 'Й': - buf += s[4] - elif s[0] == 'И': - buf += 'И' - elif s[7] == 'М': - buf += 'М' - elif s[4] == 'А': - buf += 'А' - elif s[2] == 'Р': - buf += 'Р' - elif s[1] == 'У': - buf += 'У' - elif s[9] == 'Ю': - buf += 'Ю' - else: - buf += '?' - - return buf - - - - - -# s: source -# t: type -def decode_auto(s, t, reverse_decoded=False, remove_junk=True): - if t == 1: - s = clean_string(s, remove_junk=remove_junk) - result = decode(s) - - elif t == 2: - result = decode2(s) - - elif t == 3: - result = decode3(s) - if reverse_decoded: - # reverse string - result = result[::-1] - - return result - - -def sort_data_by_date(item): - return int(time.mktime(datetime.datetime.strptime(item['date'], '%d/%m/%y').timetuple())) - - def main(): parser = argparse.ArgumentParser() parser.add_argument('--decode', action='store_true') @@ -235,7 +30,6 @@ def main(): parser.add_argument('--is-url', action='store_true') parser.add_argument('--type', type=int, choices=[1, 2, 3], default=1) parser.add_argument('--reverse-decoded', action='store_true') - parser.add_argument('--gen-page', action='store_true') args = parser.parse_args() data = load_data() @@ -289,52 +83,5 @@ def main(): count = len(data) print("Total texts: %s" % count) - elif args.gen_page: - # sort by date - data = sorted(data, key=sort_data_by_date) - #pprint(data) - - buf = [] - for post in data: - cipher_type = post['type'] if 'type' in post else 1 - decoded_text = decode_auto(post['text'], cipher_type) - - post_buf = '' - post_buf += '**Дата**: %s\n\n' % post['date'] - - if 'pic' in post and post['pic']: - # make sure it is a list - pic = post['pic'] if isinstance(post['pic'], list) else [post['pic']] - - pic_buf = [] - for p in pic: - pic_buf.append('![](./img/%s =300x)' % p) - post_buf += '**Пикрилейтед:** %s\n\n' % ''.join(pic_buf) - - if 'link' in post: - # make sure it is a list - link = post['link'] if isinstance(post['link'], list) else [post['link']] - - link_buf = [] - for l in link: - link_buf.append('[%s](%s)' % (l, l)) - post_buf += '**Ссылки:** %s\n\n' % ', '.join(link_buf) - - if 'source' in post: - post_buf += '**Источник:** %s' % post['source'] - if 'source_link' in post: - post_buf += ', [%s](%s)\n\n' % (post['source_link'], post['source_link']) - else: - post_buf += '\n\n' - - post_buf += '**Шифровка (тип %d)**: \n```\n%s\n```\n\n' % (cipher_type, post['text']) - post_buf += '**Расшифровка:** `%s`' % decoded_text - - buf.append(post_buf) - - md = "\n---\n".join(buf) - with open(os.path.join(CWD, 'data.md'), 'w') as f: - f.write(md) - if __name__ == '__main__': sys.exit(main()) |