mirror of
https://github.com/simon987/opendirectories-bot.git
synced 2025-12-14 23:59:02 +00:00
Added manual mode and HTTPS support
This commit is contained in:
@@ -1,51 +1,36 @@
|
||||
import os
|
||||
import json
|
||||
from crawl_report import ReportBuilder
|
||||
import operator
|
||||
import humanfriendly
|
||||
|
||||
|
||||
class CrawTask:
|
||||
|
||||
def __init__(self, url, post_id, title):
|
||||
self.url = url
|
||||
self.post_id = post_id
|
||||
self.post_title = title
|
||||
def __init__(self, s):
|
||||
self.submission = s
|
||||
|
||||
|
||||
class TaskQueue:
|
||||
|
||||
def __init__(self, file):
|
||||
self.file = file
|
||||
|
||||
def __init__(self):
|
||||
self.tasks = []
|
||||
|
||||
if os.path.isfile(self.file):
|
||||
|
||||
with open(self.file, "r") as f:
|
||||
json_tasks = json.load(f)
|
||||
|
||||
for task in json_tasks:
|
||||
self.tasks.append(CrawTask(task["url"], task["post_id"], task["post_title"]))
|
||||
|
||||
def push(self, task):
|
||||
self.tasks.append(task)
|
||||
self.update_file()
|
||||
|
||||
def pop(self):
|
||||
if len(self.tasks) > 0:
|
||||
t = self.tasks.pop()
|
||||
self.update_file()
|
||||
else:
|
||||
t = None
|
||||
|
||||
return t
|
||||
|
||||
def update_file(self):
|
||||
with open(self.file, "w") as f:
|
||||
json.dump(self.tasks, f, default=dumper)
|
||||
|
||||
def is_queued(self, post_id):
|
||||
|
||||
for task in self.tasks:
|
||||
if task.post_id == post_id:
|
||||
if task.submission.id == post_id:
|
||||
return True
|
||||
|
||||
return False
|
||||
@@ -61,14 +46,12 @@ class RedditBot:
|
||||
|
||||
self.log_file = log_file
|
||||
|
||||
if not os.path.isfile(log_file):
|
||||
self.crawled = []
|
||||
else:
|
||||
with open(log_file, "r") as f:
|
||||
self.crawled = list(filter(None, f.read().split("\n")))
|
||||
self.crawled = []
|
||||
self.load_from_file()
|
||||
|
||||
def log_crawl(self, post_id):
|
||||
|
||||
self.load_from_file()
|
||||
self.crawled.append(post_id)
|
||||
|
||||
with open(self.log_file, "w") as f:
|
||||
@@ -76,5 +59,55 @@ class RedditBot:
|
||||
f.write(post_id + "\n")
|
||||
|
||||
def has_crawled(self, post_id):
|
||||
|
||||
self.load_from_file()
|
||||
return post_id in self.crawled
|
||||
|
||||
def load_from_file(self):
|
||||
if not os.path.isfile(self.log_file):
|
||||
self.crawled = []
|
||||
else:
|
||||
with open(self.log_file, "r") as f:
|
||||
self.crawled = list(filter(None, f.read().split("\n")))
|
||||
|
||||
|
||||
class CommentBuilder:
|
||||
|
||||
def __init__(self, report_builder: ReportBuilder, url, post_id):
|
||||
self.report_builder = report_builder
|
||||
self.url = url
|
||||
self.post_id = post_id
|
||||
|
||||
def get_comment(self):
|
||||
|
||||
total_size = self.report_builder.get_total_size()
|
||||
|
||||
ext_counts = self.report_builder.get_ext_counts()
|
||||
ext_sizes = self.report_builder.get_ext_sizes()
|
||||
print(ext_sizes)
|
||||
ext_sizes_sorted = sorted(ext_sizes.items(), key=operator.itemgetter(1), reverse=True)
|
||||
print(ext_sizes_sorted)
|
||||
|
||||
comment = "File types | Count | Total Size\n"
|
||||
comment += ":-- | :-- | :-- \n"
|
||||
|
||||
counter = 0
|
||||
for i in range(0, len(ext_sizes_sorted)):
|
||||
|
||||
comment += ext_sizes_sorted[i][0]
|
||||
comment += " | " + str(ext_counts[ext_sizes_sorted[i][0]])
|
||||
comment += " | " + str(humanfriendly.format_size(ext_sizes_sorted[i][1], True)) + " \n"
|
||||
|
||||
counter += 1
|
||||
if counter >= 3:
|
||||
break
|
||||
|
||||
comment += "**Total** | **" + str(len(self.report_builder.files)) + "** | **"
|
||||
comment += self.report_builder.get_total_size_formatted() + "** \n\n"
|
||||
|
||||
comment += "[Full Report](https://simon987.net/od-bot/report/" + self.post_id + "/)"
|
||||
comment += " | [JSON](https://simon987.net/od-bot/report/" + self.post_id + "/json)"
|
||||
comment += " | [Link list](https://simon987.net/od-bot/report/" + self.post_id + "/links) \n"
|
||||
comment += "*** \n^(Beep boop. I am a bot that calculates the file sizes & count of"
|
||||
comment += " open directories posted in /r/opendirectories/)"
|
||||
|
||||
return comment
|
||||
|
||||
Reference in New Issue
Block a user