mirror of
https://github.com/simon987/opendirectories-bot.git
synced 2025-04-20 10:46:47 +00:00
Added added main script
This commit is contained in:
parent
a2f671f0f5
commit
d82e76387a
84
run.py
Normal file
84
run.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
import pickle
|
||||||
|
from crawler import Crawler
|
||||||
|
from reddit_bot import RedditBot, TaskQueue, CrawTask, CommentBuilder, ReportBuilder
|
||||||
|
import time
|
||||||
|
from multiprocessing import Process
|
||||||
|
import praw
|
||||||
|
|
||||||
|
reddit = praw.Reddit('opendirectories-bot',
|
||||||
|
user_agent='github.com/simon987/opendirectories-bot v1.0 (by /u/Hexahedr_n)')
|
||||||
|
|
||||||
|
subreddit = reddit.subreddit("opendirectories")
|
||||||
|
|
||||||
|
subs = []
|
||||||
|
|
||||||
|
for submission in subreddit.new(limit=50):
|
||||||
|
subs.append(submission)
|
||||||
|
|
||||||
|
bot = RedditBot("crawled.txt")
|
||||||
|
tq = TaskQueue()
|
||||||
|
|
||||||
|
for s in subs:
|
||||||
|
|
||||||
|
if not s.is_self:
|
||||||
|
if not bot.has_crawled(s.id) and not tq.is_queued(s.id):
|
||||||
|
tq.push(CrawTask(s))
|
||||||
|
|
||||||
|
print("id: " + s.id)
|
||||||
|
print("url: " + str(s.url))
|
||||||
|
print("title: " + str(s.title))
|
||||||
|
|
||||||
|
|
||||||
|
def execute_task(submission):
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not bot.has_crawled(submission.id):
|
||||||
|
c = Crawler(submission.url, True)
|
||||||
|
c.crawl()
|
||||||
|
c.store_report(submission.id, submission.title)
|
||||||
|
|
||||||
|
report_builder = ReportBuilder(c.files, c.base_url)
|
||||||
|
|
||||||
|
if report_builder.get_total_size() > 10000000:
|
||||||
|
com_buider = CommentBuilder(ReportBuilder(c.files, c.base_url), c.base_url, submission.id)
|
||||||
|
|
||||||
|
com_string = com_buider.get_comment()
|
||||||
|
|
||||||
|
print(com_string)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
submission.reply(com_string)
|
||||||
|
bot.log_crawl(submission.id)
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
print("Waiting 5 minutes: " + str(e))
|
||||||
|
time.sleep(600)
|
||||||
|
continue
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
while len(tq.tasks) > 0:
|
||||||
|
|
||||||
|
task = tq.pop()
|
||||||
|
|
||||||
|
if task is not None:
|
||||||
|
if not bot.has_crawled(task.submission.id):
|
||||||
|
p = Process(target=execute_task, args={task.submission})
|
||||||
|
p.daemon = True
|
||||||
|
p.start()
|
||||||
|
print("Started process for " + task.submission.title)
|
||||||
|
else:
|
||||||
|
print("Already crawled " + task.submission)
|
||||||
|
|
||||||
|
|
||||||
|
while True:
|
||||||
|
time.sleep(1)
|
||||||
|
print("Waiting..")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user