mirror of
https://github.com/simon987/hexlib.git
synced 2025-04-10 14:06:43 +00:00
Add more quotes in strip_quotes
This commit is contained in:
parent
1ce795a759
commit
3677815d57
@ -97,7 +97,7 @@ def preprocess(text, lowercase=False, clean_html=False, remove_punctuation=False
|
|||||||
words = text.split()
|
words = text.split()
|
||||||
|
|
||||||
if strip_quotes:
|
if strip_quotes:
|
||||||
words = filter(lambda w: w.strip("\"'"), words)
|
words = filter(lambda w: w.strip("\"'“"), words)
|
||||||
|
|
||||||
if bigrams:
|
if bigrams:
|
||||||
words = _transform_bigram(nltk.bigrams(chain(words, ("*",))), bigrams)
|
words = _transform_bigram(nltk.bigrams(chain(words, ("*",))), bigrams)
|
||||||
|
2
setup.py
2
setup.py
@ -2,7 +2,7 @@ from setuptools import setup
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="hexlib",
|
name="hexlib",
|
||||||
version="1.67",
|
version="1.68",
|
||||||
description="Misc utility methods",
|
description="Misc utility methods",
|
||||||
author="simon987",
|
author="simon987",
|
||||||
author_email="me@simon987.net",
|
author_email="me@simon987.net",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user