summaryrefslogtreecommitdiff
path: root/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'util.py')
-rw-r--r--util.py48
1 files changed, 47 insertions, 1 deletions
diff --git a/util.py b/util.py
index 9c3bb1c..1ece867 100644
--- a/util.py
+++ b/util.py
@@ -176,7 +176,7 @@ def plural(n, words):
return words[0 if n == 1 else 1]
# split text to sentences
-def split_sen(s):
+def split_sen(s, smart=True):
s = s.strip()
lines = []
@@ -202,6 +202,9 @@ def split_sen(s):
lines.append(line)
+ if not smart:
+ return lines
+
result_lines = []
for line in lines:
words = re.split(r'\s+', line)
@@ -449,3 +452,46 @@ def bf_find_words(lines, words, nospaces=False):
print()
print()
+
+
+class BFGrepDictionary:
+ def __init__(self, lines, dict_file):
+ def prepare_line(line):
+ line = re.sub(r'[\.\!\?\s]', '', line)
+ line = line.lower()
+ line = list(set(line))
+ return line
+
+ self.lines = list(map(prepare_line, lines))
+ self.lines_count = len(self.lines)
+ self.dict_file = dict_file
+
+ def go(self):
+ self.walk(0, '')
+ #bf_all(self.lines)
+
+ def walk(self, start_line, buf):
+ line = self.lines[start_line]
+ for i in range(len(line)):
+ letter = line[i]
+ if start_line == 0 and letter == 'ы':
+ continue
+ if start_line > 0 and letter == buf[-1:]:
+ continue
+ if start_line + 1 <= self.lines_count - 1:
+ self.walk(start_line + 1, buf + letter)
+ else:
+ self.check_word(buf + letter)
+
+ def check_word(self, s):
+ print(s)
+
+# def grep():
+# cmd = 'cat /tmp/all.txt | grep --color=never "%s" | xargs' % word
+# #print(cmd)
+# result = subprocess.check_output(cmd, shell=True, cwd=CWD).strip().decode('utf8')
+# if result:
+# result = result.replace("\n", ' ')
+# return result.split(' ')
+# else:
+# return None