diff options
Diffstat (limited to 'util.py')
-rw-r--r-- | util.py | 48 |
1 files changed, 47 insertions, 1 deletions
@@ -176,7 +176,7 @@ def plural(n, words): return words[0 if n == 1 else 1] # split text to sentences -def split_sen(s): +def split_sen(s, smart=True): s = s.strip() lines = [] @@ -202,6 +202,9 @@ def split_sen(s): lines.append(line) + if not smart: + return lines + result_lines = [] for line in lines: words = re.split(r'\s+', line) @@ -449,3 +452,46 @@ def bf_find_words(lines, words, nospaces=False): print() print() + + +class BFGrepDictionary: + def __init__(self, lines, dict_file): + def prepare_line(line): + line = re.sub(r'[\.\!\?\s]', '', line) + line = line.lower() + line = list(set(line)) + return line + + self.lines = list(map(prepare_line, lines)) + self.lines_count = len(self.lines) + self.dict_file = dict_file + + def go(self): + self.walk(0, '') + #bf_all(self.lines) + + def walk(self, start_line, buf): + line = self.lines[start_line] + for i in range(len(line)): + letter = line[i] + if start_line == 0 and letter == 'ы': + continue + if start_line > 0 and letter == buf[-1:]: + continue + if start_line + 1 <= self.lines_count - 1: + self.walk(start_line + 1, buf + letter) + else: + self.check_word(buf + letter) + + def check_word(self, s): + print(s) + +# def grep(): +# cmd = 'cat /tmp/all.txt | grep --color=never "%s" | xargs' % word +# #print(cmd) +# result = subprocess.check_output(cmd, shell=True, cwd=CWD).strip().decode('utf8') +# if result: +# result = result.replace("\n", ' ') +# return result.split(' ') +# else: +# return None |