summaryrefslogtreecommitdiff
path: root/analyze_new.py
diff options
context:
space:
mode:
authorrusinthread <rusinthread@cock.li>2016-12-31 19:54:28 +0300
committerrusinthread <rusinthread@cock.li>2016-12-31 19:54:28 +0300
commitb17bdb0c526c6a8493ae3dd06cf1d45fcf0458e6 (patch)
tree4651f79ddd05d597e6baaf11aa4653082a6cab67 /analyze_new.py
parentc7e5380f9976d79c1dc81dc49f60288649a43c2b (diff)
analyze new posts
Diffstat (limited to 'analyze_new.py')
-rw-r--r--analyze_new.py131
1 files changed, 131 insertions, 0 deletions
diff --git a/analyze_new.py b/analyze_new.py
new file mode 100644
index 0000000..348ed8d
--- /dev/null
+++ b/analyze_new.py
@@ -0,0 +1,131 @@
+#!/usr/bin/python3
+import re
+import operator
+import itertools
+import sys
+from pprint import pprint
+
+def load_text(n):
+ with open('new/text' + str(n) + '_orig') as f:
+ text = f.read()
+
+ lines = re.split(r'[\?\.\!]+', text)
+ lines = list(map(lambda s: s.replace(' ', '').replace('-', ''), lines))
+
+ return "\n".join(lines).strip()
+
+def letter_pos(letter):
+ if letter in predefined_table:
+ return predefined_table[letter]
+ else:
+ letter_table = table[letter]
+ if len(letter_table) > 0:
+ return letter_table[0][0]
+ else:
+ return None
+
+alphabet = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'
+
+text = ''
+text += load_text(1) + "\n"
+text += load_text(3) + "\n"
+#text += load_text(2)
+text = text.upper()
+
+predefined_table = {
+ 'М': 8,
+ 'А': 5,
+ 'Р': 3,
+ 'Ш': 1,
+ 'Щ': 1,
+ 'И': 1,
+ 'У': 2,
+ 'Ю': 10,
+ 'В': 12,
+ 'Й': 5,
+ 'Л': 2,
+ 'Ц': 2,
+ 'О': 4,
+ 'Д': 8,
+ 'Т': 9,
+ 'П': 5,
+ 'Э': 3
+}
+
+table = {}
+lines = text.split("\n")
+
+for a in alphabet:
+ table[a] = {}
+ for line in lines:
+ indexes = [m.start() for m in re.finditer(a, line)]
+ for index in indexes:
+ index += 1
+ if index in table[a]:
+ table[a][index] += 1
+ else:
+ table[a][index] = 1
+
+for a, t in table.items():
+ ts = sorted(t.items(), key=operator.itemgetter(1), reverse=True)
+ table[a] = ts
+
+#pprint(table['Э'])
+#sys.exit()
+
+variants = []
+for line in lines:
+ valid = []
+ for a in table:
+ if True:
+ index = letter_pos(a)
+ if index == None:
+ continue
+
+ try:
+ if line[index-1] == a and a not in valid:
+ valid.append(a)
+ except IndexError:
+ continue
+
+ if False:
+ letter_table = table[a]
+ if not len(letter_table):
+ continue
+
+ for i in range(3):
+ if i > len(letter_table)-1:
+ continue
+
+ if a == 'Щ':
+ index = 1
+ else:
+ index = letter_table[i][0]
+ try:
+ if line[index-1] == a and a not in valid:
+ valid.append(a)
+ except IndexError:
+ continue
+
+ variants.append(valid)
+ #print('('+''.join(valid)+')')
+
+variants = list(filter(lambda a: len(a), variants))
+variants = variants[11:31]
+
+pprint(variants)
+sys.exit()
+
+#variants = variants[11:]
+res = list(itertools.product(*variants))
+for r in res:
+ if r[0] == 'Ь': continue
+# if r[1] != 'А': continue
+# if r[5] != 'Л': continue
+# if r[4] != 'О': continue
+
+ #if r[3] != 'И': continue
+ #if r[2] != 'Д': continue
+ #if r[0] != 'Х': continue
+
+ print(''.join(r))