Added pdf & epub parsing

This commit is contained in:
simon987
2018-04-16 19:42:40 -04:00
parent 6d3cceb1b1
commit 17c682a5ef
11 changed files with 264 additions and 57 deletions

13
spec/EbookParserTest.py Normal file
View File

@@ -0,0 +1,13 @@
from unittest import TestCase
from parsing import EbookParser
class EbookParserTest(TestCase):
def test_parse_content(self):
parser = EbookParser([], 1000)
info = parser.parse("test_files/epub1.epub")
self.assertEqual(len(info["content"]), 1000)

View File

@@ -10,7 +10,7 @@ class FontParserTest(TestCase):
info = parser.parse("test_files/truetype1.ttf")
self.assertEqual(info["font_name"], "Liberation Mono Bold")
self.assertEqual(info["content"], "Liberation Mono Bold")
def test_parse_name_openType(self):
@@ -18,7 +18,7 @@ class FontParserTest(TestCase):
info = parser.parse("test_files/opentype1.otf")
self.assertEqual(info["font_name"], "Linux Biolinum Keyboard O")
self.assertEqual(info["content"], "Linux Biolinum Keyboard O")
def test_parse_name_woff(self):
@@ -26,7 +26,7 @@ class FontParserTest(TestCase):
info = parser.parse("test_files/woff.woff")
self.assertEqual(info["font_name"], "Heart of Gold")
self.assertEqual(info["content"], "Heart of Gold")
def test_parse_name_woff2(self):
@@ -34,4 +34,4 @@ class FontParserTest(TestCase):
info = parser.parse("test_files/woff2.woff2")
self.assertEqual(info["font_name"], "Heart of Gold")
self.assertEqual(info["content"], "Heart of Gold")

14
spec/PdfFileParser.py Normal file
View File

@@ -0,0 +1,14 @@
from unittest import TestCase
from parsing import PdfFileParser
class PdfParserTest(TestCase):
def test_parse_content(self):
parser = PdfFileParser([], 12488)
info = parser.parse("test_files/pdf1.pdf")
self.assertEqual(len(info["content"]), 12488)
self.assertTrue(info["content"].startswith("Rabies\n03/11/2011\nRabies"))

BIN
spec/test_files/epub1.epub Normal file

Binary file not shown.

BIN
spec/test_files/pdf1.pdf Normal file

Binary file not shown.