From 458641654c80c0920d8449da8cd0661019c24f48 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 8 Aug 2018 21:24:55 -0400 Subject: [PATCH] Minimal configuration for reddit comment callback --- app.py | 9 +++++-- callbacks.py | 27 ++++++++++++--------- crawl_server/reddit_bot.py => reddit_bot.py | 13 +++++----- 3 files changed, 29 insertions(+), 20 deletions(-) rename crawl_server/reddit_bot.py => reddit_bot.py (82%) diff --git a/app.py b/app.py index 0cb6bf2..27da164 100644 --- a/app.py +++ b/app.py @@ -13,6 +13,7 @@ import config from flask_caching import Cache from tasks import TaskManager, Task, TaskResult from search.search import ElasticSearchEngine +from callbacks import PostCrawlCallbackFactory app = Flask(__name__) if config.CAPTCHA_SUBMIT or config.CAPTCHA_LOGIN: @@ -572,7 +573,11 @@ def api_complete_task(): if filename and os.path.exists(filename): os.remove(filename) - # TODO: handle callback here + # Handle task callback + callback = PostCrawlCallbackFactory.get_callback(task) + if callback: + callback.run(task_result, searchEngine) + return "Successfully logged task result and indexed files" else: @@ -666,7 +671,7 @@ def api_task_enqueue(): request.json["url"], request.json["priority"], request.json["callback_type"], - request.json["callback_args"] + json.dumps(request.json["callback_args"]) ) taskManager.queue_task(task) return "" diff --git a/callbacks.py b/callbacks.py index 89bda6c..fec098a 100644 --- a/callbacks.py +++ b/callbacks.py @@ -1,6 +1,7 @@ -from tasks import Task -from crawl_server.reddit_bot import RedditBot +from tasks import Task, TaskResult +from reddit_bot import RedditBot import praw +from search.search import SearchEngine class PostCrawlCallback: @@ -8,7 +9,7 @@ class PostCrawlCallback: def __init__(self, task: Task): self.task = task - def run(self): + def run(self, task_result: TaskResult, search: SearchEngine): raise NotImplementedError @@ -36,26 +37,30 @@ class RedditCallback(PostCrawlCallback): user_agent='github.com/simon987/od-database (by /u/Hexahedr_n)') self.reddit_bot = RedditBot("crawled.txt", reddit) - def run(self): + def run(self, task_result: TaskResult, search: SearchEngine): raise NotImplementedError class RedditPostCallback(RedditCallback): - def run(self): + def run(self, task_result: TaskResult, search: SearchEngine): print("Reddit post callback for task " + str(self.task)) - pass class RedditCommentCallback(RedditCallback): - def run(self): - print("Reddit comment callback for task " + str(self.task)) - pass + def run(self, task_result: TaskResult, search: SearchEngine): + + comment_id = self.task.callback_args["comment_id"] + print("Replying to comment " + comment_id) + + stats = search.get_stats(self.task.website_id) + message = self.reddit_bot.get_comment(stats, self.task.website_id) + print(message) + self.reddit_bot.reply(self.reddit_bot.reddit.comment(comment_id), message) class DiscordCallback(PostCrawlCallback): - def run(self): + def run(self, task_result: TaskResult, search: SearchEngine): print("Discord callback for task " + str(self.task)) - pass diff --git a/crawl_server/reddit_bot.py b/reddit_bot.py similarity index 82% rename from crawl_server/reddit_bot.py rename to reddit_bot.py index bf3c3e4..bff336f 100644 --- a/crawl_server/reddit_bot.py +++ b/reddit_bot.py @@ -54,14 +54,13 @@ class RedditBot: @staticmethod def get_comment(stats: dict, website_id, message: str = ""): - comment = message + " \n" if len(message) > 0 else "" + comment = message + " \n" if message else "" - for stat in stats: - comment += stat + " \n" if len(stat) > 0 else "" - comment += RedditBot.format_stats(stats[stat]) + comment += RedditBot.format_stats(stats) - comment += "[Full Report](https://od-database.simon987.net/website/" + str(website_id) + "/)" - comment += " | [Link list](https://od-database.simon987.net/website/" + str(website_id) + "/links) \n" + comment += "[Full Report](https://od-db.the-eye.eu/website/" + str(website_id) + "/)" + comment += " | [Link list](https://od-db.the-eye.eu/website/" + str(website_id) + "/links)" + comment += " | [Source](https://github.com/simon987/od-database) \n" comment += "*** \n" comment += RedditBot.bottom_line @@ -74,7 +73,7 @@ class RedditBot: result += "File types | Count | Total Size\n" result += ":-- | :-- | :-- \n" counter = 0 - for mime in stats["mime_stats"]: + for mime in stats["ext_stats"]: result += mime[2] result += " | " + str(mime[1]) result += " | " + humanfriendly.format_size(mime[0]) + " \n"