Crawl tasks are now fetched by the crawlers instead of pushed by the server

This commit is contained in:
Simon
2018-07-14 17:31:18 -04:00
parent d9e9f53f92
commit fe1d29aaea
20 changed files with 376 additions and 749 deletions

61
callbacks.py Normal file
View File

@@ -0,0 +1,61 @@
from tasks import Task
from crawl_server.reddit_bot import RedditBot
import praw
class PostCrawlCallback:
def __init__(self, task: Task):
self.task = task
def run(self):
raise NotImplementedError
class PostCrawlCallbackFactory:
@staticmethod
def get_callback(task: Task):
if task.callback_type == "reddit_post":
return RedditPostCallback(task)
elif task.callback_type == "reddit_comment":
return RedditCommentCallback(task)
elif task.callback_type == "discord":
return DiscordCallback(task)
class RedditCallback(PostCrawlCallback):
def __init__(self, task: Task):
super().__init__(task)
reddit = praw.Reddit('opendirectories-bot',
user_agent='github.com/simon987/od-database (by /u/Hexahedr_n)')
self.reddit_bot = RedditBot("crawled.txt", reddit)
def run(self):
raise NotImplementedError
class RedditPostCallback(RedditCallback):
def run(self):
print("Reddit post callback for task " + str(self.task))
pass
class RedditCommentCallback(RedditCallback):
def run(self):
print("Reddit comment callback for task " + str(self.task))
pass
class DiscordCallback(PostCrawlCallback):
def run(self):
print("Discord callback for task " + str(self.task))
pass