mirror of
https://github.com/simon987/hexlib.git
synced 2025-12-13 22:59:04 +00:00
Rename test.clean to text.preprocess, add QS util func, more debug logging
This commit is contained in:
@@ -1,13 +1,13 @@
|
||||
from unittest import TestCase
|
||||
|
||||
from hexlib.text import clean
|
||||
from hexlib.text import preprocess
|
||||
|
||||
|
||||
class TestText(TestCase):
|
||||
|
||||
def test_html_invalid(self):
|
||||
text = ""
|
||||
cleaned = clean(
|
||||
cleaned = preprocess(
|
||||
text,
|
||||
clean_html=True,
|
||||
)
|
||||
@@ -17,7 +17,7 @@ class TestText(TestCase):
|
||||
|
||||
def test_html_1(self):
|
||||
text = "<div>Hello, <strong>world</strong></div>"
|
||||
cleaned = clean(
|
||||
cleaned = preprocess(
|
||||
text,
|
||||
clean_html=True,
|
||||
)
|
||||
@@ -27,7 +27,7 @@ class TestText(TestCase):
|
||||
|
||||
def test_html_2(self):
|
||||
text = "<div>Hello, <strong>world</strong></div>"
|
||||
cleaned = clean(
|
||||
cleaned = preprocess(
|
||||
text,
|
||||
clean_html=True,
|
||||
lowercase=True
|
||||
@@ -38,7 +38,7 @@ class TestText(TestCase):
|
||||
|
||||
def test_html_3(self):
|
||||
text = "<div>\n Hello, \t\n<strong> world </strong>\n\t</div>"
|
||||
cleaned = clean(
|
||||
cleaned = preprocess(
|
||||
text,
|
||||
clean_html=True,
|
||||
lowercase=True,
|
||||
@@ -49,7 +49,7 @@ class TestText(TestCase):
|
||||
|
||||
def test_html_4(self):
|
||||
text = "<div>\n Hello, \t\n<strong> world </strong>\n\t</div>"
|
||||
cleaned = clean(
|
||||
cleaned = preprocess(
|
||||
text,
|
||||
clean_html=True,
|
||||
lowercase=True,
|
||||
@@ -61,7 +61,7 @@ class TestText(TestCase):
|
||||
|
||||
def test_html_5(self):
|
||||
text = "<div>\n Hello, \t\n<strong> world </strong>\n\t</div>"
|
||||
cleaned = clean(
|
||||
cleaned = preprocess(
|
||||
text,
|
||||
clean_html=True,
|
||||
lowercase=True,
|
||||
@@ -74,7 +74,7 @@ class TestText(TestCase):
|
||||
|
||||
def test_html_6(self):
|
||||
text = "<div>\n Hello, \t\n<strong>a the world </strong>\n\t</div>"
|
||||
cleaned = clean(
|
||||
cleaned = preprocess(
|
||||
text,
|
||||
clean_html=True,
|
||||
lowercase=True,
|
||||
@@ -88,7 +88,7 @@ class TestText(TestCase):
|
||||
|
||||
def test_html_7(self):
|
||||
text = "<div>\n Hello, \t\n<strong>a the worlds </strong>\n\t</div>"
|
||||
cleaned = clean(
|
||||
cleaned = preprocess(
|
||||
text,
|
||||
clean_html=True,
|
||||
lowercase=True,
|
||||
@@ -103,7 +103,7 @@ class TestText(TestCase):
|
||||
|
||||
def test_html_8(self):
|
||||
text = "<div>\n Hello, \t\n<strong>a the worlds! </strong>\n\t</div>"
|
||||
cleaned = clean(
|
||||
cleaned = preprocess(
|
||||
text,
|
||||
clean_html=True,
|
||||
lowercase=True,
|
||||
@@ -118,7 +118,7 @@ class TestText(TestCase):
|
||||
|
||||
def test_html_9(self):
|
||||
text = "<div>\n Hello, \t\n<strong>world! it's it`s </strong>\n\t</div>"
|
||||
cleaned = clean(
|
||||
cleaned = preprocess(
|
||||
text,
|
||||
clean_html=True,
|
||||
lowercase=True,
|
||||
@@ -133,7 +133,7 @@ class TestText(TestCase):
|
||||
|
||||
def test_html_10(self):
|
||||
text = "<div>\n Hello, \t\n<strong>world! it's it`s https://google.ca/test/abc.pdf </strong>\n\t</div>"
|
||||
cleaned = clean(
|
||||
cleaned = preprocess(
|
||||
text,
|
||||
clean_html=True,
|
||||
lowercase=True,
|
||||
@@ -148,8 +148,8 @@ class TestText(TestCase):
|
||||
self.assertEqual(cleaned, expected)
|
||||
|
||||
def test_html_11(self):
|
||||
text = "<div>\n Hello, \t\n<strong>world! it's it`s u us & | </strong>\n\t</div>"
|
||||
cleaned = clean(
|
||||
text = "<div>\n Hello, \t\n<strong>world! it's it`s & | </strong>\n\t</div>"
|
||||
cleaned = preprocess(
|
||||
text,
|
||||
clean_html=True,
|
||||
lowercase=True,
|
||||
@@ -166,7 +166,7 @@ class TestText(TestCase):
|
||||
|
||||
def test_bigrams(self):
|
||||
text = "x A b c d e f g h"
|
||||
cleaned = clean(
|
||||
cleaned = preprocess(
|
||||
text,
|
||||
lowercase=True,
|
||||
bigrams={
|
||||
|
||||
21
test/test_web.py
Normal file
21
test/test_web.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from unittest import TestCase
|
||||
|
||||
from hexlib.web import url_query_value
|
||||
|
||||
|
||||
class TestWebMiscFuncs(TestCase):
|
||||
def test_qs_1(self):
|
||||
url = "https://test.com/page?a=1&b=2&a=2&c=hello"
|
||||
|
||||
self.assertEqual(url_query_value(url, "a"), "1")
|
||||
self.assertEqual(url_query_value(url, "b"), "2")
|
||||
self.assertEqual(url_query_value(url, "c"), "hello")
|
||||
self.assertEqual(url_query_value(url, "D"), None)
|
||||
|
||||
def test_qs_as_list(self):
|
||||
url = "https://test.com/page?a=1&b=2&a=2&c=hello"
|
||||
|
||||
self.assertEqual(url_query_value(url, "a", as_list=True), ["1", "2"])
|
||||
self.assertEqual(url_query_value(url, "b", as_list=True), ["2"])
|
||||
self.assertEqual(url_query_value(url, "c", as_list=True), ["hello"])
|
||||
self.assertEqual(url_query_value(url, "D", as_list=True), [])
|
||||
Reference in New Issue
Block a user