From d82e76387a31d39db9b0e873c0fafbddcf839773 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 7 Feb 2018 19:51:45 -0500 Subject: [PATCH] Added added main script --- run.py | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 run.py diff --git a/run.py b/run.py new file mode 100644 index 0000000..16fd0aa --- /dev/null +++ b/run.py @@ -0,0 +1,84 @@ +import pickle +from crawler import Crawler +from reddit_bot import RedditBot, TaskQueue, CrawTask, CommentBuilder, ReportBuilder +import time +from multiprocessing import Process +import praw + +reddit = praw.Reddit('opendirectories-bot', + user_agent='github.com/simon987/opendirectories-bot v1.0 (by /u/Hexahedr_n)') + +subreddit = reddit.subreddit("opendirectories") + +subs = [] + +for submission in subreddit.new(limit=50): + subs.append(submission) + +bot = RedditBot("crawled.txt") +tq = TaskQueue() + +for s in subs: + + if not s.is_self: + if not bot.has_crawled(s.id) and not tq.is_queued(s.id): + tq.push(CrawTask(s)) + + print("id: " + s.id) + print("url: " + str(s.url)) + print("title: " + str(s.title)) + + +def execute_task(submission): + + try: + if not bot.has_crawled(submission.id): + c = Crawler(submission.url, True) + c.crawl() + c.store_report(submission.id, submission.title) + + report_builder = ReportBuilder(c.files, c.base_url) + + if report_builder.get_total_size() > 10000000: + com_buider = CommentBuilder(ReportBuilder(c.files, c.base_url), c.base_url, submission.id) + + com_string = com_buider.get_comment() + + print(com_string) + while True: + try: + submission.reply(com_string) + bot.log_crawl(submission.id) + break + except Exception as e: + print("Waiting 5 minutes: " + str(e)) + time.sleep(600) + continue + + except Exception as e: + print(e) + raise e + + +while len(tq.tasks) > 0: + + task = tq.pop() + + if task is not None: + if not bot.has_crawled(task.submission.id): + p = Process(target=execute_task, args={task.submission}) + p.daemon = True + p.start() + print("Started process for " + task.submission.title) + else: + print("Already crawled " + task.submission) + + +while True: + time.sleep(1) + print("Waiting..") + + + + +