mirror of
https://github.com/simon987/opendirectories-bot.git
synced 2025-04-10 14:06:44 +00:00
Added added main script
This commit is contained in:
parent
a2f671f0f5
commit
d82e76387a
84
run.py
Normal file
84
run.py
Normal file
@ -0,0 +1,84 @@
|
||||
import pickle
|
||||
from crawler import Crawler
|
||||
from reddit_bot import RedditBot, TaskQueue, CrawTask, CommentBuilder, ReportBuilder
|
||||
import time
|
||||
from multiprocessing import Process
|
||||
import praw
|
||||
|
||||
reddit = praw.Reddit('opendirectories-bot',
|
||||
user_agent='github.com/simon987/opendirectories-bot v1.0 (by /u/Hexahedr_n)')
|
||||
|
||||
subreddit = reddit.subreddit("opendirectories")
|
||||
|
||||
subs = []
|
||||
|
||||
for submission in subreddit.new(limit=50):
|
||||
subs.append(submission)
|
||||
|
||||
bot = RedditBot("crawled.txt")
|
||||
tq = TaskQueue()
|
||||
|
||||
for s in subs:
|
||||
|
||||
if not s.is_self:
|
||||
if not bot.has_crawled(s.id) and not tq.is_queued(s.id):
|
||||
tq.push(CrawTask(s))
|
||||
|
||||
print("id: " + s.id)
|
||||
print("url: " + str(s.url))
|
||||
print("title: " + str(s.title))
|
||||
|
||||
|
||||
def execute_task(submission):
|
||||
|
||||
try:
|
||||
if not bot.has_crawled(submission.id):
|
||||
c = Crawler(submission.url, True)
|
||||
c.crawl()
|
||||
c.store_report(submission.id, submission.title)
|
||||
|
||||
report_builder = ReportBuilder(c.files, c.base_url)
|
||||
|
||||
if report_builder.get_total_size() > 10000000:
|
||||
com_buider = CommentBuilder(ReportBuilder(c.files, c.base_url), c.base_url, submission.id)
|
||||
|
||||
com_string = com_buider.get_comment()
|
||||
|
||||
print(com_string)
|
||||
while True:
|
||||
try:
|
||||
submission.reply(com_string)
|
||||
bot.log_crawl(submission.id)
|
||||
break
|
||||
except Exception as e:
|
||||
print("Waiting 5 minutes: " + str(e))
|
||||
time.sleep(600)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
raise e
|
||||
|
||||
|
||||
while len(tq.tasks) > 0:
|
||||
|
||||
task = tq.pop()
|
||||
|
||||
if task is not None:
|
||||
if not bot.has_crawled(task.submission.id):
|
||||
p = Process(target=execute_task, args={task.submission})
|
||||
p.daemon = True
|
||||
p.start()
|
||||
print("Started process for " + task.submission.title)
|
||||
else:
|
||||
print("Already crawled " + task.submission)
|
||||
|
||||
|
||||
while True:
|
||||
time.sleep(1)
|
||||
print("Waiting..")
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user