add horochan

This commit is contained in:
simon 2019-09-08 12:03:42 -04:00
parent 94d15faccc
commit dfb415421e
2 changed files with 82 additions and 4 deletions

View File

@ -5,7 +5,8 @@ image boards and publishes serialised JSON to RabbitMQ
for real-time ingest. for real-time ingest.
Compatible image boards: 4chan, lainchan, uboachan, Compatible image boards: 4chan, lainchan, uboachan,
22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal. 22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal,
horochan.
Can optionally push monitoring data to InfluxDB. Below is an Can optionally push monitoring data to InfluxDB. Below is an
example of Grafana being used to display it. example of Grafana being used to display it.

83
chan.py
View File

@ -9,7 +9,7 @@ from util import logger
class ChanHelper: class ChanHelper:
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps=None): def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps):
self.db_id = db_id self.db_id = db_id
self._base_url = base_url self._base_url = base_url
self._image_url = image_url self._image_url = image_url
@ -56,7 +56,7 @@ class ChanHelper:
raise NotImplementedError raise NotImplementedError
@staticmethod @staticmethod
def parse_threads_list(content): def parse_threads_list(r):
raise NotImplementedError raise NotImplementedError
@staticmethod @staticmethod
@ -258,6 +258,74 @@ class SynchJsonChanHelper(JsonChanHelper):
return list(urls) return list(urls)
class MayuriChanHelper(ChanHelper):
def __init__(self, db_id, base_url, image_url, boards, rps):
super().__init__(db_id, base_url, image_url, None, None, boards, rps)
@staticmethod
def item_id(item):
return item["id"]
@staticmethod
def item_mtime(item):
return item["timestamp"]
@staticmethod
def thread_mtime(thread):
return thread["replies_count"]
def item_urls(self, item, board):
urls = set()
if "message" in item and item["message"]:
urls.update(get_links_from_body(item["message"]))
elif "subject" in item and item["subject"]:
urls.update(get_links_from_body(item["subject"]))
if item["files"]:
for file in item["files"]:
urls.add(self._image_url % file["storage"] + file["name"] + "." + file["ext"])
return list(urls)
@staticmethod
def item_type(item):
return "thread" if "replies_count" in item else "post"
def parse_threads_list(self, r):
try:
j = json.loads(r.text)
except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text)
return [], None
if j["currentPage"] < j["totalPages"]:
return j["data"], self._base_url + "boards/%d" % (j["currentPage"] + 1, )
return j["data"]
@staticmethod
def parse_thread(r):
try:
j = json.loads(r.text)
except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text)
return []
thread = dict(j["data"])
del thread["replies"]
yield thread
if j["data"]["replies"]:
for post in j["data"]["replies"]:
yield post
def threads_url(self, board):
return "%sboards/1" % (self._base_url, )
def posts_url(self, board, thread):
return "%sthreads/%d" % (self._base_url, thread)
CHANS = { CHANS = {
"4chan": JsonChanHelper( "4chan": JsonChanHelper(
@ -276,7 +344,7 @@ CHANS = {
"news", "out", "po", "pol", "qst", "sci", "soc", "sp", "news", "out", "po", "pol", "qst", "sci", "soc", "sp",
"tg", "toy", "trv", "tv", "vp", "wsg", "wsr", "x" "tg", "toy", "trv", "tv", "vp", "wsg", "wsr", "x"
), ),
rps=3 / 2 rps=2
), ),
"lainchan": JsonChanHelper( "lainchan": JsonChanHelper(
2, 2,
@ -447,4 +515,13 @@ CHANS = {
), ),
rps=1 / 600 rps=1 / 600
), ),
"horochan": MayuriChanHelper(
15,
"https://api.horochan.ru/v1/",
"https://%s.horochan.ru/src/",
(
"b"
),
rps=4
),
} }