mirror of
https://github.com/simon987/hexlib.git
synced 2025-04-04 02:12:59 +00:00
Add more quotes in strip_quotes
This commit is contained in:
parent
1ce795a759
commit
3677815d57
@ -97,7 +97,7 @@ def preprocess(text, lowercase=False, clean_html=False, remove_punctuation=False
|
||||
words = text.split()
|
||||
|
||||
if strip_quotes:
|
||||
words = filter(lambda w: w.strip("\"'"), words)
|
||||
words = filter(lambda w: w.strip("\"'“"), words)
|
||||
|
||||
if bigrams:
|
||||
words = _transform_bigram(nltk.bigrams(chain(words, ("*",))), bigrams)
|
||||
|
Loading…
x
Reference in New Issue
Block a user