1 files changed, 26 insertions, 3 deletions
diff --git a/main.py b/main.py
index 5e50913..48ac788 100755
--- a/main.py
+++ b/main.py
@@ -26,8 +26,8 @@ def load_data():
 
 def clean_string(s, remove_junk=False):
     s = s.replace(')', ') ')
-    s = re.sub(r'\!([^\)])', r'! \1', s)
-    s = s.replace('/', ' ')
+    s = re.sub(r'(\!|\.)([^\)])', r'\1 \2', s)
+    #s = s.replace('/', ' ')
     s = s.upper()
     
     s = re.sub(r'\s+', ' ', s).strip()
@@ -55,6 +55,7 @@ def clean_string(s, remove_junk=False):
         'ГУБЕРНИЯ',
         'С ВЫШЕСТОЯЩИМИ КОНТРОЛЬ',
         'С ЛОКАЦИИ',
+        'SEARCHED',
         #'КАЗНЬ',
         'ГУБЕРНИЯ',
         'ПРОВЕРКИ',
@@ -80,7 +81,26 @@ def clean_string(s, remove_junk=False):
         'УКАЗАНО',
         '- ВЫСОКИЙ ТИТУЛ',
         'ЗАКАЗ',
-        'ЧЕРТЫ ЛИЦА'
+        'ЧЕРТЫ ЛИЦА',
+        
+        # english
+        'SCHOOL ON THE RIGHT',
+        'WILL NOT ALLOW',
+        'FLYWHEEL',
+        'TRIUMPHANTLY',
+        #'UNACCEPTABLE',
+        'BEING USED',
+        'NICE',
+        'UMBRELLA',
+        #'BIOROBOT',
+        'CONSERVATISM',
+        'WAS ESTABLISHED',
+        'WITH A PASSWORD',
+        'ANT',
+        'YEAR',
+        'RECOGNIZED'
+        #'LEGAL',
+        #'FIGHTING'
     ]
 
     # только без пробелов
@@ -113,6 +133,9 @@ def decode(s):
 
         if re.match(r'^\d+\%$', word):
             buf += word
+        elif word.endswith('://'):
+            buf += word[0]
+            buf += '://'
         else:
             letter = word[0]
             buf += letter