diff --git a/README.md b/README.md index 141a8e3..fbdc1a5 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,8 @@ image boards and publishes serialised JSON to RabbitMQ for real-time ingest. Compatible image boards: 4chan, lainchan, uboachan, -22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal. +22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal, +horochan. Can optionally push monitoring data to InfluxDB. Below is an example of Grafana being used to display it. diff --git a/chan.py b/chan.py index 2a8e639..712f0cc 100644 --- a/chan.py +++ b/chan.py @@ -9,7 +9,7 @@ from util import logger class ChanHelper: - def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps=None): + def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps): self.db_id = db_id self._base_url = base_url self._image_url = image_url @@ -56,7 +56,7 @@ class ChanHelper: raise NotImplementedError @staticmethod - def parse_threads_list(content): + def parse_threads_list(r): raise NotImplementedError @staticmethod @@ -258,6 +258,74 @@ class SynchJsonChanHelper(JsonChanHelper): return list(urls) +class MayuriChanHelper(ChanHelper): + + def __init__(self, db_id, base_url, image_url, boards, rps): + super().__init__(db_id, base_url, image_url, None, None, boards, rps) + + @staticmethod + def item_id(item): + return item["id"] + + @staticmethod + def item_mtime(item): + return item["timestamp"] + + @staticmethod + def thread_mtime(thread): + return thread["replies_count"] + + def item_urls(self, item, board): + urls = set() + + if "message" in item and item["message"]: + urls.update(get_links_from_body(item["message"])) + elif "subject" in item and item["subject"]: + urls.update(get_links_from_body(item["subject"])) + if item["files"]: + for file in item["files"]: + urls.add(self._image_url % file["storage"] + file["name"] + "." + file["ext"]) + + return list(urls) + + @staticmethod + def item_type(item): + return "thread" if "replies_count" in item else "post" + + def parse_threads_list(self, r): + try: + j = json.loads(r.text) + except JSONDecodeError: + logger.warning("JSONDecodeError for %s:" % (r.url,)) + logger.warning(r.text) + return [], None + if j["currentPage"] < j["totalPages"]: + return j["data"], self._base_url + "boards/%d" % (j["currentPage"] + 1, ) + return j["data"] + + @staticmethod + def parse_thread(r): + try: + j = json.loads(r.text) + except JSONDecodeError: + logger.warning("JSONDecodeError for %s:" % (r.url,)) + logger.warning(r.text) + return [] + + thread = dict(j["data"]) + del thread["replies"] + yield thread + + if j["data"]["replies"]: + for post in j["data"]["replies"]: + yield post + + def threads_url(self, board): + return "%sboards/1" % (self._base_url, ) + + def posts_url(self, board, thread): + return "%sthreads/%d" % (self._base_url, thread) + CHANS = { "4chan": JsonChanHelper( @@ -276,7 +344,7 @@ CHANS = { "news", "out", "po", "pol", "qst", "sci", "soc", "sp", "tg", "toy", "trv", "tv", "vp", "wsg", "wsr", "x" ), - rps=3 / 2 + rps=2 ), "lainchan": JsonChanHelper( 2, @@ -447,4 +515,13 @@ CHANS = { ), rps=1 / 600 ), + "horochan": MayuriChanHelper( + 15, + "https://api.horochan.ru/v1/", + "https://%s.horochan.ru/src/", + ( + "b" + ), + rps=4 + ), }