Fixed bugs, enhanced parser

This commit is contained in:
simon
2018-02-05 22:05:07 -05:00
parent f3dc1445e4
commit 23775ec126
12 changed files with 459 additions and 80 deletions

View File

@@ -1,5 +1,5 @@
from unittest import TestCase
from reddit_bot import RedditBot
from reddit_bot import RedditBot, TaskQueue, CrawTask
import os
@@ -33,3 +33,62 @@ class RedditBotTest(TestCase):
self.assertTrue(bot.has_crawled("000000"))
class TaskQueueTest(TestCase):
def tearDown(self):
if os.path.isfile("task_queue_test.txt"):
os.remove("task_queue_test.txt")
def test_push_pop_test(self):
if os.path.isfile("task_queue_test.txt"):
os.remove("task_queue_test.txt")
tq = TaskQueue("task_queue_test.txt")
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
task1 = tq.pop()
self.assertEqual(tq.pop(), None)
self.assertEqual(task1.url, "http://awebsite.com/")
self.assertEqual(task1.post_id, "postid")
def test_persistence(self):
if os.path.isfile("task_queue_test.txt"):
os.remove("task_queue_test.txt")
tq = TaskQueue("task_queue_test.txt")
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
tq2 = TaskQueue("task_queue_test.txt")
task = tq2.pop()
self.assertEqual(task.url, "http://awebsite.com/")
self.assertEqual(task.post_id, "postid")
def test_multiple_tasks(self):
if os.path.isfile("task_queue_test.txt"):
os.remove("task_queue_test.txt")
tq = TaskQueue("task_queue_test.txt")
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
self.assertIsNotNone(tq.pop())
self.assertIsNotNone(tq.pop())
self.assertIsNotNone(tq.pop())
self.assertIsNone(tq.pop())
def test_is_queued(self):
if os.path.isfile("task_queue_test.txt"):
os.remove("task_queue_test.txt")
tq = TaskQueue("task_queue_test.txt")
tq.push(CrawTask("http://awebsite.com/", "postid", "a title"))
self.assertTrue(tq.is_queued("postid"))
self.assertFalse(tq.is_queued("123456"))