diff options
author | rusinthread <rusinthread@cock.li> | 2016-12-31 19:54:28 +0300 |
---|---|---|
committer | rusinthread <rusinthread@cock.li> | 2016-12-31 19:54:28 +0300 |
commit | b17bdb0c526c6a8493ae3dd06cf1d45fcf0458e6 (patch) | |
tree | 4651f79ddd05d597e6baaf11aa4653082a6cab67 /analyze_new.py | |
parent | c7e5380f9976d79c1dc81dc49f60288649a43c2b (diff) |
analyze new posts
Diffstat (limited to 'analyze_new.py')
-rw-r--r-- | analyze_new.py | 131 |
1 files changed, 131 insertions, 0 deletions
diff --git a/analyze_new.py b/analyze_new.py new file mode 100644 index 0000000..348ed8d --- /dev/null +++ b/analyze_new.py @@ -0,0 +1,131 @@ +#!/usr/bin/python3 +import re +import operator +import itertools +import sys +from pprint import pprint + +def load_text(n): + with open('new/text' + str(n) + '_orig') as f: + text = f.read() + + lines = re.split(r'[\?\.\!]+', text) + lines = list(map(lambda s: s.replace(' ', '').replace('-', ''), lines)) + + return "\n".join(lines).strip() + +def letter_pos(letter): + if letter in predefined_table: + return predefined_table[letter] + else: + letter_table = table[letter] + if len(letter_table) > 0: + return letter_table[0][0] + else: + return None + +alphabet = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ' + +text = '' +text += load_text(1) + "\n" +text += load_text(3) + "\n" +#text += load_text(2) +text = text.upper() + +predefined_table = { + 'М': 8, + 'А': 5, + 'Р': 3, + 'Ш': 1, + 'Щ': 1, + 'И': 1, + 'У': 2, + 'Ю': 10, + 'В': 12, + 'Й': 5, + 'Л': 2, + 'Ц': 2, + 'О': 4, + 'Д': 8, + 'Т': 9, + 'П': 5, + 'Э': 3 +} + +table = {} +lines = text.split("\n") + +for a in alphabet: + table[a] = {} + for line in lines: + indexes = [m.start() for m in re.finditer(a, line)] + for index in indexes: + index += 1 + if index in table[a]: + table[a][index] += 1 + else: + table[a][index] = 1 + +for a, t in table.items(): + ts = sorted(t.items(), key=operator.itemgetter(1), reverse=True) + table[a] = ts + +#pprint(table['Э']) +#sys.exit() + +variants = [] +for line in lines: + valid = [] + for a in table: + if True: + index = letter_pos(a) + if index == None: + continue + + try: + if line[index-1] == a and a not in valid: + valid.append(a) + except IndexError: + continue + + if False: + letter_table = table[a] + if not len(letter_table): + continue + + for i in range(3): + if i > len(letter_table)-1: + continue + + if a == 'Щ': + index = 1 + else: + index = letter_table[i][0] + try: + if line[index-1] == a and a not in valid: + valid.append(a) + except IndexError: + continue + + variants.append(valid) + #print('('+''.join(valid)+')') + +variants = list(filter(lambda a: len(a), variants)) +variants = variants[11:31] + +pprint(variants) +sys.exit() + +#variants = variants[11:] +res = list(itertools.product(*variants)) +for r in res: + if r[0] == 'Ь': continue +# if r[1] != 'А': continue +# if r[5] != 'Л': continue +# if r[4] != 'О': continue + + #if r[3] != 'И': continue + #if r[2] != 'Д': continue + #if r[0] != 'Х': continue + + print(''.join(r)) |