mirror of
https://github.com/simon987/hexlib.git
synced 2025-12-13 14:49:05 +00:00
Add trigrams
This commit is contained in:
@@ -245,6 +245,20 @@ class TestText(TestCase):
|
||||
|
||||
self.assertEqual(cleaned, expected)
|
||||
|
||||
def test_trigrams(self):
|
||||
text = "x A b c d e f g h"
|
||||
cleaned = preprocess(
|
||||
text,
|
||||
lowercase=True,
|
||||
trigrams={
|
||||
("a", "b", "c"),
|
||||
("e", "f", "g"),
|
||||
}
|
||||
)
|
||||
expected = "x a_b_c d e_f_g h"
|
||||
|
||||
self.assertEqual(cleaned, expected)
|
||||
|
||||
def test_remove_numbers(self):
|
||||
text = "Hello1 test1124test 12 1 1111111 world"
|
||||
cleaned = preprocess(
|
||||
|
||||
Reference in New Issue
Block a user