mirror of
				https://github.com/simon987/opendirectories-bot.git
				synced 2025-10-31 14:16:52 +00:00 
			
		
		
		
	Added added main script
This commit is contained in:
		
							parent
							
								
									a2f671f0f5
								
							
						
					
					
						commit
						d82e76387a
					
				
							
								
								
									
										84
									
								
								run.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								run.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,84 @@ | |||||||
|  | import pickle | ||||||
|  | from crawler import Crawler | ||||||
|  | from reddit_bot import RedditBot, TaskQueue, CrawTask, CommentBuilder, ReportBuilder | ||||||
|  | import time | ||||||
|  | from multiprocessing import Process | ||||||
|  | import praw | ||||||
|  | 
 | ||||||
|  | reddit = praw.Reddit('opendirectories-bot', | ||||||
|  |  user_agent='github.com/simon987/opendirectories-bot v1.0  (by /u/Hexahedr_n)') | ||||||
|  | 
 | ||||||
|  | subreddit = reddit.subreddit("opendirectories") | ||||||
|  | 
 | ||||||
|  | subs = [] | ||||||
|  | 
 | ||||||
|  | for submission in subreddit.new(limit=50): | ||||||
|  |     subs.append(submission) | ||||||
|  | 
 | ||||||
|  | bot = RedditBot("crawled.txt") | ||||||
|  | tq = TaskQueue() | ||||||
|  | 
 | ||||||
|  | for s in subs: | ||||||
|  | 
 | ||||||
|  |     if not s.is_self: | ||||||
|  |         if not bot.has_crawled(s.id) and not tq.is_queued(s.id): | ||||||
|  |             tq.push(CrawTask(s)) | ||||||
|  | 
 | ||||||
|  |             print("id: " + s.id) | ||||||
|  |             print("url: " + str(s.url)) | ||||||
|  |             print("title: " + str(s.title)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def execute_task(submission): | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|  |         if not bot.has_crawled(submission.id): | ||||||
|  |             c = Crawler(submission.url, True) | ||||||
|  |             c.crawl() | ||||||
|  |             c.store_report(submission.id, submission.title) | ||||||
|  | 
 | ||||||
|  |             report_builder = ReportBuilder(c.files, c.base_url) | ||||||
|  | 
 | ||||||
|  |             if report_builder.get_total_size() > 10000000: | ||||||
|  |                 com_buider = CommentBuilder(ReportBuilder(c.files, c.base_url), c.base_url, submission.id) | ||||||
|  | 
 | ||||||
|  |                 com_string = com_buider.get_comment() | ||||||
|  | 
 | ||||||
|  |                 print(com_string) | ||||||
|  |                 while True: | ||||||
|  |                     try: | ||||||
|  |                         submission.reply(com_string) | ||||||
|  |                         bot.log_crawl(submission.id) | ||||||
|  |                         break | ||||||
|  |                     except Exception as e: | ||||||
|  |                         print("Waiting 5 minutes: " + str(e)) | ||||||
|  |                         time.sleep(600) | ||||||
|  |                         continue | ||||||
|  | 
 | ||||||
|  |     except Exception as e: | ||||||
|  |         print(e) | ||||||
|  |         raise e | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | while len(tq.tasks) > 0: | ||||||
|  | 
 | ||||||
|  |     task = tq.pop() | ||||||
|  | 
 | ||||||
|  |     if task is not None: | ||||||
|  |         if not bot.has_crawled(task.submission.id): | ||||||
|  |             p = Process(target=execute_task, args={task.submission}) | ||||||
|  |             p.daemon = True | ||||||
|  |             p.start() | ||||||
|  |             print("Started process for " + task.submission.title) | ||||||
|  |         else: | ||||||
|  |             print("Already crawled " + task.submission) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | while True: | ||||||
|  |     time.sleep(1) | ||||||
|  |     print("Waiting..") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user