add horochan

This commit is contained in:
simon 2019-09-08 12:03:42 -04:00
parent 94d15faccc
commit dfb415421e
2 changed files with 82 additions and 4 deletions

View File

@ -5,7 +5,8 @@ image boards and publishes serialised JSON to RabbitMQ
for real-time ingest.
Compatible image boards: 4chan, lainchan, uboachan,
22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal.
22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal,
horochan.
Can optionally push monitoring data to InfluxDB. Below is an
example of Grafana being used to display it.

83
chan.py
View File

@ -9,7 +9,7 @@ from util import logger
class ChanHelper:
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps=None):
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps):
self.db_id = db_id
self._base_url = base_url
self._image_url = image_url
@ -56,7 +56,7 @@ class ChanHelper:
raise NotImplementedError
@staticmethod
def parse_threads_list(content):
def parse_threads_list(r):
raise NotImplementedError
@staticmethod
@ -258,6 +258,74 @@ class SynchJsonChanHelper(JsonChanHelper):
return list(urls)
class MayuriChanHelper(ChanHelper):
def __init__(self, db_id, base_url, image_url, boards, rps):
super().__init__(db_id, base_url, image_url, None, None, boards, rps)
@staticmethod
def item_id(item):
return item["id"]
@staticmethod
def item_mtime(item):
return item["timestamp"]
@staticmethod
def thread_mtime(thread):
return thread["replies_count"]
def item_urls(self, item, board):
urls = set()
if "message" in item and item["message"]:
urls.update(get_links_from_body(item["message"]))
elif "subject" in item and item["subject"]:
urls.update(get_links_from_body(item["subject"]))
if item["files"]:
for file in item["files"]:
urls.add(self._image_url % file["storage"] + file["name"] + "." + file["ext"])
return list(urls)
@staticmethod
def item_type(item):
return "thread" if "replies_count" in item else "post"
def parse_threads_list(self, r):
try:
j = json.loads(r.text)
except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text)
return [], None
if j["currentPage"] < j["totalPages"]:
return j["data"], self._base_url + "boards/%d" % (j["currentPage"] + 1, )
return j["data"]
@staticmethod
def parse_thread(r):
try:
j = json.loads(r.text)
except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text)
return []
thread = dict(j["data"])
del thread["replies"]
yield thread
if j["data"]["replies"]:
for post in j["data"]["replies"]:
yield post
def threads_url(self, board):
return "%sboards/1" % (self._base_url, )
def posts_url(self, board, thread):
return "%sthreads/%d" % (self._base_url, thread)
CHANS = {
"4chan": JsonChanHelper(
@ -276,7 +344,7 @@ CHANS = {
"news", "out", "po", "pol", "qst", "sci", "soc", "sp",
"tg", "toy", "trv", "tv", "vp", "wsg", "wsr", "x"
),
rps=3 / 2
rps=2
),
"lainchan": JsonChanHelper(
2,
@ -447,4 +515,13 @@ CHANS = {
),
rps=1 / 600
),
"horochan": MayuriChanHelper(
15,
"https://api.horochan.ru/v1/",
"https://%s.horochan.ru/src/",
(
"b"
),
rps=4
),
}