Bug fixes

This commit is contained in:
simon987
2018-04-21 20:36:49 -04:00
parent 6b754b4bb4
commit b963b667b8
18 changed files with 128 additions and 83 deletions

View File

@@ -8,9 +8,9 @@ class CrawlerTest(TestCase):
def test_dir_walk(self):
c = Crawler([GenericFileParser([Sha1CheckSumCalculator()])])
c = Crawler([GenericFileParser([Sha1CheckSumCalculator()], "test_files/")])
c.crawl("test_folder")
c.crawl("./test_folder")
self.assertEqual(len(c.documents), 31)
@@ -19,3 +19,16 @@ class CrawlerTest(TestCase):
c = Crawler([])
self.assertEqual(c.countFiles("test_folder"), 31)
def test_path(self):
c = Crawler([GenericFileParser([], "./test_folder")])
c.crawl("./test_folder")
file_count_in_sub2 = 0
for doc in c.documents:
if doc["path"] == "sub2":
file_count_in_sub2 += 1
self.assertEqual(file_count_in_sub2, 2)

View File

@@ -6,7 +6,7 @@ class DocxParserTest(TestCase):
def test_parse_content(self):
parser = DocxParser([], 1000)
parser = DocxParser([], 1000, "test_files/")
info = parser.parse("test_files/docx1.docx")

View File

@@ -6,7 +6,7 @@ class EbookParserTest(TestCase):
def test_parse_content(self):
parser = EbookParser([], 1000)
parser = EbookParser([], 1000, "test_files/")
info = parser.parse("test_files/epub1.epub")

View File

@@ -16,34 +16,34 @@ class GenericFileParserTest(TestCase):
test_file.close()
os.utime("test_parse.txt", (1330123456, 1330654321))
self.parser = GenericFileParser([Md5CheckSumCalculator()])
self.parser = GenericFileParser([Md5CheckSumCalculator()], "./test_files/")
def tearDown(self):
os.remove("test_parse.txt")
def test_parse_size(self):
result = self.parser.parse("test_parse.txt")
result = self.parser.parse("./test_parse.txt")
self.assertEqual(result["size"], 8)
def test_parse_name(self):
result = self.parser.parse("test_parse.txt")
result = self.parser.parse("./test_parse.txt")
self.assertEqual(result["name"], "test_parse")
def test_parse_ext(self):
result = self.parser.parse("test_parse.txt")
result = self.parser.parse("./test_parse.txt")
self.assertEqual(result["extension"], "txt")
def test_parse_md5(self):
result = self.parser.parse("test_parse.txt")
result = self.parser.parse("./test_parse.txt")
self.assertEqual(result["md5"], "25D55AD283AA400AF464C76D713C07AD")
def test_mtime(self):
result = self.parser.parse("test_parse.txt")
result = self.parser.parse("./test_parse.txt")
self.assertEqual(result["mtime"], 1330654321)

View File

@@ -6,7 +6,7 @@ class FontParserTest(TestCase):
def test_parse_name_trueType(self):
parser = FontParser([])
parser = FontParser([], "test_files/")
info = parser.parse("test_files/truetype1.ttf")
@@ -14,7 +14,7 @@ class FontParserTest(TestCase):
def test_parse_name_openType(self):
parser = FontParser([])
parser = FontParser([], "test_files/")
info = parser.parse("test_files/opentype1.otf")
@@ -22,7 +22,7 @@ class FontParserTest(TestCase):
def test_parse_name_woff(self):
parser = FontParser([])
parser = FontParser([], "test_files/")
info = parser.parse("test_files/woff.woff")
@@ -30,7 +30,7 @@ class FontParserTest(TestCase):
def test_parse_name_woff2(self):
parser = FontParser([])
parser = FontParser([], "test_files/")
info = parser.parse("test_files/woff2.woff2")

View File

@@ -6,37 +6,33 @@ class MediaFileParserTest(TestCase):
def test_audio_wav(self):
parser = MediaFileParser([])
parser = MediaFileParser([], "test_files/")
info = parser.parse("test_files/cat1.wav")
info = parser.parse("./test_files/cat1.wav")
self.assertEqual(info["format_name"], "wav")
self.assertEqual(info["format_long_name"], "WAV / WAVE (Waveform Audio)")
self.assertEqual(info["duration"], 20.173875)
def test_video_mov(self):
parser = MediaFileParser([])
parser = MediaFileParser([], "./test_files")
info = parser.parse("test_files/vid1.mp4")
info = parser.parse("./test_files/vid1.mp4")
self.assertEqual(info["format_name"], "mov,mp4,m4a,3gp,3g2,mj2")
self.assertEqual(info["format_long_name"], "QuickTime / MOV")
self.assertEqual(info["duration"], 5.334)
def test_video_webm(self):
parser = MediaFileParser([])
parser = MediaFileParser([], "test_files/")
info = parser.parse("test_files/vid2.webm")
self.assertEqual(info["format_name"], "matroska,webm")
self.assertEqual(info["format_long_name"], "Matroska / WebM")
self.assertEqual(info["duration"], 10.619)
def test_video_ogg(self):
parser = MediaFileParser([])
parser = MediaFileParser([], "test_files/")
info = parser.parse("test_files/vid3.ogv")
self.assertEqual(info["format_name"], "ogg")
self.assertEqual(info["format_long_name"], "Ogg")
self.assertEqual(info["duration"], 10.618867)

View File

@@ -6,7 +6,7 @@ class PdfParserTest(TestCase):
def test_parse_content(self):
parser = PdfFileParser([], 12488)
parser = PdfFileParser([], 12488, "test_files/")
info = parser.parse("test_files/pdf1.pdf")

View File

@@ -6,7 +6,7 @@ class PictureFileParserTest(TestCase):
def test_parse_jpg(self):
parser = PictureFileParser([])
parser = PictureFileParser([], "test_files/")
info = parser.parse("test_folder/sample_1.jpg")
@@ -17,7 +17,7 @@ class PictureFileParserTest(TestCase):
def test_parse_png(self):
parser = PictureFileParser([])
parser = PictureFileParser([], "test_files/")
info = parser.parse("test_folder/sample_5.png")
@@ -28,7 +28,7 @@ class PictureFileParserTest(TestCase):
def test_parse_gif(self):
parser = PictureFileParser([])
parser = PictureFileParser([], "test_files/")
info = parser.parse("test_folder/sample_6.gif")
@@ -39,7 +39,7 @@ class PictureFileParserTest(TestCase):
def test_parse_bmp(self):
parser = PictureFileParser([])
parser = PictureFileParser([], "test_files/")
info = parser.parse("test_folder/sample_7.bmp")

View File

@@ -6,7 +6,7 @@ class PdfParserTest(TestCase):
def test_parse_content_xls(self):
parser = SpreadSheetParser([], 1500)
parser = SpreadSheetParser([], 1500, "test_files/")
info = parser.parse("test_files/xls1.xls")
@@ -14,7 +14,7 @@ class PdfParserTest(TestCase):
def test_parse_content_xlsx(self):
parser = SpreadSheetParser([], 1500)
parser = SpreadSheetParser([], 1500, "test_files/")
info = parser.parse("test_files/xlsx1.xlsx")

View File

@@ -6,7 +6,7 @@ class TextFileParserTest(TestCase):
def test_parse_csv(self):
parser = TextFileParser([], 1234)
parser = TextFileParser([], 1234, "test_files/")
info = parser.parse("test_files/text.csv")