Added manual mode and HTTPS support

This commit is contained in:
simon
2018-02-07 19:51:06 -05:00
parent 8e1f4543fd
commit a2f671f0f5
13 changed files with 217 additions and 143 deletions

View File

@@ -1,51 +1,36 @@
import os
import json
from crawl_report import ReportBuilder
import operator
import humanfriendly
class CrawTask:
def __init__(self, url, post_id, title):
self.url = url
self.post_id = post_id
self.post_title = title
def __init__(self, s):
self.submission = s
class TaskQueue:
def __init__(self, file):
self.file = file
def __init__(self):
self.tasks = []
if os.path.isfile(self.file):
with open(self.file, "r") as f:
json_tasks = json.load(f)
for task in json_tasks:
self.tasks.append(CrawTask(task["url"], task["post_id"], task["post_title"]))
def push(self, task):
self.tasks.append(task)
self.update_file()
def pop(self):
if len(self.tasks) > 0:
t = self.tasks.pop()
self.update_file()
else:
t = None
return t
def update_file(self):
with open(self.file, "w") as f:
json.dump(self.tasks, f, default=dumper)
def is_queued(self, post_id):
for task in self.tasks:
if task.post_id == post_id:
if task.submission.id == post_id:
return True
return False
@@ -61,14 +46,12 @@ class RedditBot:
self.log_file = log_file
if not os.path.isfile(log_file):
self.crawled = []
else:
with open(log_file, "r") as f:
self.crawled = list(filter(None, f.read().split("\n")))
self.crawled = []
self.load_from_file()
def log_crawl(self, post_id):
self.load_from_file()
self.crawled.append(post_id)
with open(self.log_file, "w") as f:
@@ -76,5 +59,55 @@ class RedditBot:
f.write(post_id + "\n")
def has_crawled(self, post_id):
self.load_from_file()
return post_id in self.crawled
def load_from_file(self):
if not os.path.isfile(self.log_file):
self.crawled = []
else:
with open(self.log_file, "r") as f:
self.crawled = list(filter(None, f.read().split("\n")))
class CommentBuilder:
def __init__(self, report_builder: ReportBuilder, url, post_id):
self.report_builder = report_builder
self.url = url
self.post_id = post_id
def get_comment(self):
total_size = self.report_builder.get_total_size()
ext_counts = self.report_builder.get_ext_counts()
ext_sizes = self.report_builder.get_ext_sizes()
print(ext_sizes)
ext_sizes_sorted = sorted(ext_sizes.items(), key=operator.itemgetter(1), reverse=True)
print(ext_sizes_sorted)
comment = "File types | Count | Total Size\n"
comment += ":-- | :-- | :-- \n"
counter = 0
for i in range(0, len(ext_sizes_sorted)):
comment += ext_sizes_sorted[i][0]
comment += " | " + str(ext_counts[ext_sizes_sorted[i][0]])
comment += " | " + str(humanfriendly.format_size(ext_sizes_sorted[i][1], True)) + " \n"
counter += 1
if counter >= 3:
break
comment += "**Total** | **" + str(len(self.report_builder.files)) + "** | **"
comment += self.report_builder.get_total_size_formatted() + "** \n\n"
comment += "[Full Report](https://simon987.net/od-bot/report/" + self.post_id + "/)"
comment += " | [JSON](https://simon987.net/od-bot/report/" + self.post_id + "/json)"
comment += " | [Link list](https://simon987.net/od-bot/report/" + self.post_id + "/links) \n"
comment += "*** \n^(Beep boop. I am a bot that calculates the file sizes & count of"
comment += " open directories posted in /r/opendirectories/)"
return comment