add lolnada

This commit is contained in:
simon 2019-09-08 17:26:33 -04:00
parent 2ce0659f23
commit 60fa4893d8
9 changed files with 102 additions and 13 deletions

View File

@ -6,7 +6,7 @@ image boards and publishes serialised JSON to RabbitMQ
Compatible image boards: 4chan, lainchan, uboachan, Compatible image boards: 4chan, lainchan, uboachan,
22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal, 22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal,
horochan, doushio, desuchan, tgchan. horochan, doushio, desuchan, tgchan, lolnada.
Can optionally push monitoring data to InfluxDB. Below is an Can optionally push monitoring data to InfluxDB. Below is an
example of Grafana being used to display it. example of Grafana being used to display it.

View File

@ -3,6 +3,7 @@ from chan.desuchan_html import DesuChanHtmlChanHelper
from chan.doushio_html import DoushioHtmlChanHelper from chan.doushio_html import DoushioHtmlChanHelper
from chan.endchan_html import EndchanHtmlChanHelper from chan.endchan_html import EndchanHtmlChanHelper
from chan.json import JsonChanHelper from chan.json import JsonChanHelper
from chan.lolnada_html import LolNadaHtmlChanHelper
from chan.mayuri import MayuriChanHelper from chan.mayuri import MayuriChanHelper
from chan.russian_json import RussianJsonChanHelper from chan.russian_json import RussianJsonChanHelper
from chan.synch_json import SynchJsonChanHelper from chan.synch_json import SynchJsonChanHelper
@ -105,7 +106,7 @@ CHANS = {
"a", "fd", "ja", "ma", "vn", "fg", "fur", "gg", "ga", "a", "fd", "ja", "ma", "vn", "fg", "fur", "gg", "ga",
"vape", "h", "ho", "hc", "e", "fet", "sex", "fag" "vape", "h", "ho", "hc", "e", "fet", "sex", "fag"
), ),
rps=1 rps=1/10
), ),
"endchan": EndchanHtmlChanHelper( "endchan": EndchanHtmlChanHelper(
8, 8,
@ -126,7 +127,7 @@ CHANS = {
"ausneets", "qanonresearch", "polru", "yuri", "christianity", "ausneets", "qanonresearch", "polru", "yuri", "christianity",
"kc", "rapport", "news", "brit", "webm", "4chon" "kc", "rapport", "news", "brit", "webm", "4chon"
), ),
rps=1 rps=1/2
), ),
"38chan": JsonChanHelper( "38chan": JsonChanHelper(
9, 9,
@ -149,7 +150,7 @@ CHANS = {
"b", "pol", "sk", "int", "slav", "s", "gv", "mda", "sp", "b", "pol", "sk", "int", "slav", "s", "gv", "mda", "sp",
"fit", "had", "fit", "had",
), ),
rps=1 / 4 rps=1 / 30
), ),
"gnfos": JsonChanHelper( "gnfos": JsonChanHelper(
11, 11,
@ -214,7 +215,7 @@ CHANS = {
( (
"moe", "moe",
), ),
rps=1 rps=1/20
), ),
"desuchan": DesuChanHtmlChanHelper( "desuchan": DesuChanHtmlChanHelper(
17, 17,
@ -253,6 +254,18 @@ CHANS = {
( (
"draw", "meep", "quest", "questdis", "tg", "icons", "draw", "meep", "quest", "questdis", "tg", "icons",
), ),
rps=1, rps=1/600,
) ),
"lolnada": LolNadaHtmlChanHelper(
20,
"https://lolnada.org/",
"https://lolnada.org/",
"/hilo/",
"/src/",
(
"b", "a", "aw", "cgl", "dw", "int", "qt", "sad", "t",
"toy", "v", "x", "34", "e", "f", "h"
),
rps=1/20,
),
} }

View File

@ -12,7 +12,7 @@ class DesuChanHtmlChanHelper(ChanHelper):
return "%s%s/" % (self._base_url, board) return "%s%s/" % (self._base_url, board)
def posts_url(self, board, thread): def posts_url(self, board, thread):
return "%s%s%s%d.html" % (self._base_url, board, self._thread_path, thread) return "%s%s%s%d.html" % (self._base_url, board, self._thread_path, self.item_id(thread))
@staticmethod @staticmethod
def item_id(item): def item_id(item):

View File

@ -13,7 +13,7 @@ class DoushioHtmlChanHelper(ChanHelper):
return "%s%s/" % (self._base_url, board) return "%s%s/" % (self._base_url, board)
def posts_url(self, board, thread): def posts_url(self, board, thread):
return "%s%s/%d" % (self._base_url, board, thread) return "%s%s/%d" % (self._base_url, board, self.item_id(thread))
@staticmethod @staticmethod
def item_id(item): def item_id(item):

View File

@ -13,7 +13,7 @@ class EndchanHtmlChanHelper(ChanHelper):
return "%s%s/" % (self._base_url, board) return "%s%s/" % (self._base_url, board)
def posts_url(self, board, thread): def posts_url(self, board, thread):
return "%s%s%s%d.html" % (self._base_url, board, self._thread_path, thread) return "%s%s%s%d.html" % (self._base_url, board, self._thread_path, self.item_id(thread))
@staticmethod @staticmethod
def item_id(item): def item_id(item):

View File

@ -21,7 +21,7 @@ class ChanHelper:
return "%s%s/threads.json" % (self._base_url, board) return "%s%s/threads.json" % (self._base_url, board)
def posts_url(self, board, thread): def posts_url(self, board, thread):
return "%s%s%s%d.json" % (self._base_url, board, self._thread_path, thread) return "%s%s%s%d.json" % (self._base_url, board, self._thread_path, self.item_id(thread))
def board_hash(self, board): def board_hash(self, board):
return str((self._boards.index(board) + 1) * 10000) return str((self._boards.index(board) + 1) * 10000)

76
chan/lolnada_html.py Normal file
View File

@ -0,0 +1,76 @@
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from dateutil import parser
from chan.helper import ChanHelper
from post_process import get_links_from_html_body
class LolNadaHtmlChanHelper(ChanHelper):
def threads_url(self, board):
return "%s%s/" % (self._base_url, board)
def posts_url(self, board, thread):
return "%s%s" % (self._base_url, thread["url"])
@staticmethod
def item_id(item):
return item["id"]
def item_urls(self, item, board):
return [
x for
x in set(get_links_from_html_body(item["html"], self._base_url))
if "google.com" not in x and "iqdb.org" not in x
]
@staticmethod
def item_type(item):
return item["type"]
@staticmethod
def thread_mtime(thread):
return -1
@staticmethod
def item_mtime(item):
return item["time"]
def parse_threads_list(self, r):
soup = BeautifulSoup(r.text, "html.parser")
threads = []
for threadEl in soup.find_all("div", class_="hilo"):
threads.append({
"id": int(threadEl.get("data-id")),
"url": threadEl.find("a", class_="post_no").get("href"),
})
for form in soup.find_all("form"):
next_button = form.find("input", attrs={"value": "Siguiente"})
if next_button and form.get("action") != "none":
return threads, urljoin(self._base_url, form.get("action"))
return threads, None
@staticmethod
def parse_thread(r):
soup = BeautifulSoup(r.text, "html.parser")
op_el = soup.find("div", class_="hilo")
for post_el in op_el.find_all("div", class_="post reply"):
yield {
"id": int(post_el.get("id")[6:]),
"type": "post",
"html": str(post_el),
"time": int(parser.parse(post_el.find("time").get("datetime")).timestamp())
}
post_el.decompose()
yield {
"id": int(op_el.get("id")[5:]),
"type": "thread",
"html": str(op_el),
"time": int(parser.parse(op_el.find("time").get("datetime")).timestamp())
}

View File

@ -72,4 +72,4 @@ class MayuriChanHelper(ChanHelper):
return "%sboards/1" % (self._base_url,) return "%sboards/1" % (self._base_url,)
def posts_url(self, board, thread): def posts_url(self, board, thread):
return "%sthreads/%d" % (self._base_url, thread) return "%sthreads/%d" % (self._base_url, self.item_id(thread))

2
run.py
View File

@ -47,7 +47,7 @@ class ChanScanner:
def _posts(self, board): def _posts(self, board):
for thread in self._threads(board): for thread in self._threads(board):
if self.state.has_new_posts(thread, self.helper, board): if self.state.has_new_posts(thread, self.helper, board):
for post in self._fetch_posts(board, self.helper.item_id(thread)): for post in self._fetch_posts(board, thread):
yield post yield post
self.state.mark_thread_as_visited(thread, self.helper, board) self.state.mark_thread_as_visited(thread, self.helper, board)