1 files changed, 202 insertions, 0 deletions
diff --git a/data_lib.py b/data_lib.py
new file mode 100644
index 0000000..7c7859a
--- /dev/null
+++ b/data_lib.py
@@ -0,0 +1,202 @@
+import os
+import json
+import re
+
+CWD = os.path.dirname(os.path.realpath(__file__))
+
+def load_data():
+    with open(os.path.join(CWD, "data.json")) as f:
+        data = json.loads(f.read())
+
+    # ignore placeholders
+    data = list(filter(lambda i: i['text'] != '', data))
+    
+    return data
+
+def clean_string(s, remove_junk=False):
+    s = s.replace(')', ') ')
+    s = re.sub(r'(\!|\.)([^\)])', r'\1 \2', s)
+    #s = s.replace('/', ' ')
+    s = s.upper()
+    
+    s = re.sub(r'\s+', ' ', s).strip()
+
+    junks = [
+        'ВОЕННОЕ',
+        'ВЫШЕСТОЯЩИХ',
+        'ПРАВО',
+        'ПРАВИЛАМ ВОЙНЫ',
+        'ВЫПИСКА',
+        'КОНТРОЛЬ',
+        'ИХ',
+        'ПО',
+        'НАВЫКИ',
+        'С ВЫШЕСТОЯЩИМИ',
+        #'ПРИСУТСТВИЕ',
+        #'ЛИНЕЙНО',
+        'ЗАКОННО!',
+        'ПОХЛЕБКА',
+        'СВЯЗЕЙ',
+        'ЖУЮЩЕГО ХРЯЩИ',
+        'ИНДЕКСИРОВАН БЕЗУКОРИЗНЕННО',
+        'ОТКЛАДЫВАЕТСЯ ЛИНЕЙНО',
+        '- ЕГО ВЕЛИЧЕСТВО',
+        'ГУБЕРНИЯ',
+        'С ВЫШЕСТОЯЩИМИ КОНТРОЛЬ',
+        'С ЛОКАЦИИ',
+        #'КАЗНЬ',
+        'ГУБЕРНИЯ',
+        'ПРОВЕРКИ',
+        'УСТАНОВЛЕНО',
+        'ПОБЕДИТЕЛЕМ',
+        #'СТАЛЬНЫЕ',
+        'НЕРВЫ',
+        'ДАРОВАНО',
+        #'ТРАНСПОРТИРОВКА',
+        'ОДОБРЕНО',
+        'ПРОЯВЛЕНИЯ',
+        'УЗАКОНЕНО',
+        'ИМЕЕТСЯ',
+        'ЗНАЛ',
+        'НЕ ПРИМЕЧЕНО',
+        'НА СЕВЕР',
+        'ПРИГОВОРИТЬ',
+        'ШЕСТВУЕМ',
+        'ДАГОН',
+        'ДА МЕРЗНУЩИЙ',
+        'КОФЕ',
+        #'РЕАГИРОВАНИЕ',
+        'УКАЗАНО',
+        '- ВЫСОКИЙ ТИТУЛ',
+        'ЗАКАЗ',
+        'ЧЕРТЫ ЛИЦА',
+        
+        # english
+        'SCHOOL ON THE RIGHT',
+        'WILL NOT ALLOW',
+        'FLYWHEEL',
+        'TRIUMPHANTLY',
+        'BEING USED',
+        'NICE',
+        'UMBRELLA',
+        #'BIOROBOT',
+        'CONSERVATISM',
+        'WAS ESTABLISHED',
+        'WITH A PASSWORD',
+        'ANT',
+        'YEAR',
+        'RECOGNIZED',
+        'SEARCHED'
+        #'LEGAL',
+        #'FIGHTING'
+    ]
+
+    # только без пробелов
+    junks_words = list(filter(lambda w: ' ' not in w, junks))
+
+    # только с пробелами
+    junks_nwords = list(filter(lambda w: w not in junks_words, junks))
+
+    if remove_junk:
+        s = s.split(' ')
+        s = list(filter(lambda l: re.sub(r'\.|\!$', '', l) not in junks_words, s))
+        s = ' '.join(s)
+
+        for j in junks_nwords:
+            s = s.replace(j, '')
+
+        # хортица - это буква Х
+        s = s.replace('Х О Р Т И Ц А', 'Х_О_Р_Т_И_Ц_А')
+    
+    s = re.sub(r'\s+', ' ', s).strip()
+    return s
+
+def decode(s, is_url=False):
+    buf = ''
+    for word in s.split(' '):
+        word = word.strip()
+        if word == '':
+            continue
+
+        if re.match(r'^\d+', word):
+            buf += word
+        elif is_url and word.endswith('://'):
+            buf += word[0]
+            buf += '://'
+        else:
+            letter = word[0]
+            buf += letter
+    
+    return buf
+
+def decode2(s):
+    buf = ''
+    for s in re.split(r'[\?\.\!]+', s):
+        s = s.strip()
+        if s == '':
+            continue
+
+        words = s.split(' ')
+
+        letter = words[1][0]
+        buf += letter
+
+    return buf
+
+def decode3(s):
+    buf = ''
+    for s in re.split(r'[\?\.\!]+', s):
+        s = s.strip()
+        s = s.replace(' ', '')
+        s = s.replace('-', '')
+        if not s:
+            continue
+
+        print(s)
+        continue
+
+        s = s.upper()
+
+        if s[0] in ('Ш', 'Щ', 'И'):
+            buf += s[0]
+        elif s[4] == 'Й':
+            buf += s[4]
+        elif s[0] == 'И':
+            buf += 'И'
+        elif s[7] == 'М':
+            buf += 'М'
+        elif s[4] == 'А':
+            buf += 'А'
+        elif s[2] == 'Р':
+            buf += 'Р'
+        elif s[1] == 'У':
+            buf += 'У'
+        elif s[9] == 'Ю':
+            buf += 'Ю'
+        else:
+            buf += '?'
+
+    return buf
+
+
+    
+
+
+# s: source
+# t: type
+def decode_auto(s, t, reverse_decoded=False, remove_junk=True):
+    if t == 1:
+        s = clean_string(s, remove_junk=remove_junk)
+        result = decode(s)
+    
+    elif t == 2:
+        result = decode2(s)
+
+    elif t == 3:
+        result = decode3(s)
+
+    if reverse_decoded:
+        # reverse string
+        result = result[::-1]
+
+    return result