diff options
Diffstat (limited to 'main.py')
-rwxr-xr-x | main.py | 42 |
1 files changed, 34 insertions, 8 deletions
@@ -55,7 +55,6 @@ def clean_string(s, remove_junk=False): 'ГУБЕРНИЯ', 'С ВЫШЕСТОЯЩИМИ КОНТРОЛЬ', 'С ЛОКАЦИИ', - 'SEARCHED', #'КАЗНЬ', 'ГУБЕРНИЯ', 'ПРОВЕРКИ', @@ -88,7 +87,6 @@ def clean_string(s, remove_junk=False): 'WILL NOT ALLOW', 'FLYWHEEL', 'TRIUMPHANTLY', - #'UNACCEPTABLE', 'BEING USED', 'NICE', 'UMBRELLA', @@ -99,7 +97,7 @@ def clean_string(s, remove_junk=False): 'ANT', 'YEAR', 'RECOGNIZED', - #'SEARCHED' + 'SEARCHED' #'LEGAL', #'FIGHTING' ] @@ -124,7 +122,6 @@ def clean_string(s, remove_junk=False): s = re.sub(r'\s+', ' ', s).strip() return s - def decode(s, is_url=False): buf = '' for word in s.split(' '): @@ -142,6 +139,20 @@ def decode(s, is_url=False): buf += letter return buf + +def decode2(s): + buf = '' + for s in re.split(r'[\?\.\!]+', s): + s = s.strip() + if s == '': + continue + + words = s.split(' ') + + letter = words[1][0] + buf += letter + + return buf def main(): parser = argparse.ArgumentParser() @@ -150,24 +161,39 @@ def main(): parser.add_argument('--decode-string') parser.add_argument('--with-junk', action='store_true') parser.add_argument('--is-url', action='store_true') + parser.add_argument('--type', type=int, choices=[1, 2], default=1) + parser.add_argument('--reverse-decoded', action='store_true') args = parser.parse_args() data = load_data() if args.decode: - # ignore type2 - data = list(filter(lambda i: 'type' not in i, data)) + # filter by type + if args.type == 2: + data = list(filter(lambda i: 'type' in i and i['type'] == 2, data)) + else: + data = list(filter(lambda i: 'type' not in i, data)) # sort by text length data = sorted(data, key=lambda i: len(i['text'])) for obj in data: text = obj['text'] - text = clean_string(text, remove_junk=(not args.with_junk)) + + if args.type == 1: + text = clean_string(text, remove_junk=(not args.with_junk)) + text_decoded = decode(text) + + elif args.type == 2: + text_decoded = decode2(text) + + if args.reverse_decoded: + # reverse string + text_decoded = text_decoded[::-1] print(obj['text']) print_colored(text, 'green', fallback_prefix='[CLEANED] ') - print_colored(decode(text), 'cyan', fallback_prefix='[DECODED] ') + print_colored(text_decoded, 'cyan', fallback_prefix='[DECODED] ') if 'pic' in obj: pic = obj['pic'] if isinstance(obj['pic'], list) else [obj['pic']] |