mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-12-14 07:39:05 +00:00
Added pdf & epub parsing
This commit is contained in:
13
spec/EbookParserTest.py
Normal file
13
spec/EbookParserTest.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from unittest import TestCase
|
||||
from parsing import EbookParser
|
||||
|
||||
|
||||
class EbookParserTest(TestCase):
|
||||
|
||||
def test_parse_content(self):
|
||||
|
||||
parser = EbookParser([], 1000)
|
||||
|
||||
info = parser.parse("test_files/epub1.epub")
|
||||
|
||||
self.assertEqual(len(info["content"]), 1000)
|
||||
@@ -10,7 +10,7 @@ class FontParserTest(TestCase):
|
||||
|
||||
info = parser.parse("test_files/truetype1.ttf")
|
||||
|
||||
self.assertEqual(info["font_name"], "Liberation Mono Bold")
|
||||
self.assertEqual(info["content"], "Liberation Mono Bold")
|
||||
|
||||
def test_parse_name_openType(self):
|
||||
|
||||
@@ -18,7 +18,7 @@ class FontParserTest(TestCase):
|
||||
|
||||
info = parser.parse("test_files/opentype1.otf")
|
||||
|
||||
self.assertEqual(info["font_name"], "Linux Biolinum Keyboard O")
|
||||
self.assertEqual(info["content"], "Linux Biolinum Keyboard O")
|
||||
|
||||
def test_parse_name_woff(self):
|
||||
|
||||
@@ -26,7 +26,7 @@ class FontParserTest(TestCase):
|
||||
|
||||
info = parser.parse("test_files/woff.woff")
|
||||
|
||||
self.assertEqual(info["font_name"], "Heart of Gold")
|
||||
self.assertEqual(info["content"], "Heart of Gold")
|
||||
|
||||
def test_parse_name_woff2(self):
|
||||
|
||||
@@ -34,4 +34,4 @@ class FontParserTest(TestCase):
|
||||
|
||||
info = parser.parse("test_files/woff2.woff2")
|
||||
|
||||
self.assertEqual(info["font_name"], "Heart of Gold")
|
||||
self.assertEqual(info["content"], "Heart of Gold")
|
||||
|
||||
14
spec/PdfFileParser.py
Normal file
14
spec/PdfFileParser.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from unittest import TestCase
|
||||
from parsing import PdfFileParser
|
||||
|
||||
|
||||
class PdfParserTest(TestCase):
|
||||
|
||||
def test_parse_content(self):
|
||||
|
||||
parser = PdfFileParser([], 12488)
|
||||
|
||||
info = parser.parse("test_files/pdf1.pdf")
|
||||
|
||||
self.assertEqual(len(info["content"]), 12488)
|
||||
self.assertTrue(info["content"].startswith("Rabies\n03/11/2011\nRabies"))
|
||||
BIN
spec/test_files/epub1.epub
Normal file
BIN
spec/test_files/epub1.epub
Normal file
Binary file not shown.
BIN
spec/test_files/pdf1.pdf
Normal file
BIN
spec/test_files/pdf1.pdf
Normal file
Binary file not shown.
Reference in New Issue
Block a user