From 55fd4a66d2d2062b9fe83c54db5d70f5c75b9f1c Mon Sep 17 00:00:00 2001 From: simon987 Date: Tue, 16 Nov 2021 11:48:23 -0500 Subject: [PATCH] Fix strip_quotes --- hexlib/text.py | 2 +- setup.py | 2 +- test/test_text.py | 10 ++++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/hexlib/text.py b/hexlib/text.py index 4ff5904..bc660ee 100644 --- a/hexlib/text.py +++ b/hexlib/text.py @@ -97,7 +97,7 @@ def preprocess(text, lowercase=False, clean_html=False, remove_punctuation=False words = text.split() if strip_quotes: - words = filter(lambda w: w.strip("\"'“"), words) + words = map(lambda w: w.strip("\"'“”"), words) if bigrams: words = _transform_bigram(nltk.bigrams(chain(words, ("*",))), bigrams) diff --git a/setup.py b/setup.py index 9199c84..073c7ea 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup setup( name="hexlib", - version="1.68", + version="1.69", description="Misc utility methods", author="simon987", author_email="me@simon987.net", diff --git a/test/test_text.py b/test/test_text.py index e10e1bd..3d6f7ce 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -247,3 +247,13 @@ class TestText(TestCase): expected = "hello1 test1124test world" self.assertEqual(" ".join(cleaned), expected) + + def test_strip_quotes(self): + text = "'hi' “test” 'hello\"" + cleaned = preprocess( + text, + strip_quotes=True + ) + expected = "hi test hello" + + self.assertEqual(" ".join(cleaned), expected)