Added pdf & epub parsing

This commit is contained in:
simon987
2018-04-16 19:42:40 -04:00
parent 6d3cceb1b1
commit 17c682a5ef
11 changed files with 264 additions and 57 deletions

14
spec/PdfFileParser.py Normal file
View File

@@ -0,0 +1,14 @@
from unittest import TestCase
from parsing import PdfFileParser
class PdfParserTest(TestCase):
def test_parse_content(self):
parser = PdfFileParser([], 12488)
info = parser.parse("test_files/pdf1.pdf")
self.assertEqual(len(info["content"]), 12488)
self.assertTrue(info["content"].startswith("Rabies\n03/11/2011\nRabies"))