rename meta attributes, add 2ch.hk support, version bump

This commit is contained in:
2019-09-05 12:59:08 -04:00
parent 2890222c4d
commit 9447463e56
6 changed files with 137 additions and 45 deletions

105
chan.py
View File

@@ -1,3 +1,8 @@
import json
from post_process import get_links_from_body
class ChanHelper:
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards):
self.db_id = db_id
@@ -16,6 +21,85 @@ class ChanHelper:
def posts_url(self, board, thread):
return "%s%s%s%d.json" % (self._base_url, board, self._thread_path, thread)
@staticmethod
def item_id(item):
return item["no"]
def item_urls(self, item, board):
urls = set()
if "com" in item and item["com"]:
urls.update(get_links_from_body(item["com"]))
elif "sub" in item and item["sub"]:
urls.update(get_links_from_body(item["sub"]))
if "fsize" in item and item["fsize"]:
urls.add(self.image_url(board, item["tim"], item["ext"]))
return list(urls)
@staticmethod
def item_type(item):
return "thread" if "sub" in item else "post"
@staticmethod
def thread_mtime(thread):
return thread["last_modified"]
@staticmethod
def parse_threads_list(content):
j = json.loads(content)
for page in j:
for thread in page["threads"]:
yield thread
@staticmethod
def parse_thread(content):
j = json.loads(content)
return j["posts"]
class RussianChanHelper(ChanHelper):
@staticmethod
def item_id(item):
return int(item["num"])
@staticmethod
def parse_threads_list(content):
j = json.loads(content)
return j["threads"]
@staticmethod
def parse_thread(content):
j = json.loads(content)
for thread in j["threads"]:
for post in thread["posts"]:
yield post
@staticmethod
def thread_mtime(thread):
return thread["posts_count"]
@staticmethod
def item_type(item):
return "thread" if "subject" in item and item["subject"] != "" else "post"
def item_urls(self, item, board):
urls = set()
if "comment" in item and item["comment"]:
urls.update(get_links_from_body(item["comment"]))
elif "subject" in item and item["subject"]:
urls.update(get_links_from_body(item["subject"]))
if urls:
print(list(urls))
for file in item["files"]:
urls.add(self._base_url + file["path"])
return list(urls)
CHANS = {
"4chan": ChanHelper(
@@ -87,6 +171,25 @@ CHANS = {
"/src/",
[
"rails"
]
],
),
"2chhk": RussianChanHelper(
7,
"https://2ch.hk/",
"https://2ch.hk/",
"/res/",
"/src/",
[
"d", "b", "o", "soc", "media", "r", "api", "rf", "int",
"po", "news", "hry", "au", "bi", "biz", "bo", "c", "em",
"fa", "fiz", "fl", "ftb", "hh", "hi", "me", "mg", "mlp",
"mo", "mov", "mu", "ne", "psy", "re",
"sci", "sf", "sn", "sp", "spc", "tv", "un", "w", "wh",
"wm", "wp", "zog", "de", "di", "diy", "mus", "pa", "p",
"wrk", "trv", "gd", "hw", "mobi", "pr", "ra", "s", "t",
"web", "bg", "cg", "gsg", "ruvn", "tes", "v", "vg", "wr",
"a", "fd", "ja", "ma", "vn", "fg", "fur", "gg", "ga",
"vape", "h", "ho", "hc", "e", "fet", "sex", "fag"
],
)
}