add tgchan

This commit is contained in:
simon 2019-09-08 16:34:46 -04:00
parent fd56531d6c
commit 2ce0659f23
4 changed files with 64 additions and 3 deletions

View File

@ -6,7 +6,7 @@ image boards and publishes serialised JSON to RabbitMQ
Compatible image boards: 4chan, lainchan, uboachan, Compatible image boards: 4chan, lainchan, uboachan,
22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal, 22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal,
horochan, doushio, desuchan. horochan, doushio, desuchan, tgchan.
Can optionally push monitoring data to InfluxDB. Below is an Can optionally push monitoring data to InfluxDB. Below is an
example of Grafana being used to display it. example of Grafana being used to display it.

View File

@ -6,6 +6,7 @@ from chan.json import JsonChanHelper
from chan.mayuri import MayuriChanHelper from chan.mayuri import MayuriChanHelper
from chan.russian_json import RussianJsonChanHelper from chan.russian_json import RussianJsonChanHelper
from chan.synch_json import SynchJsonChanHelper from chan.synch_json import SynchJsonChanHelper
from chan.tgchan_html import TgChanHtmlChanHelper
CHANS = { CHANS = {
"4chan": JsonChanHelper( "4chan": JsonChanHelper(
@ -243,4 +244,15 @@ CHANS = {
), ),
rps=1/20 rps=1/20
), ),
"tgchan": TgChanHtmlChanHelper(
19,
"https://tgchan.org/kusaba/",
"https://tgchan.org/kusaba/",
"/res/",
"/src/",
(
"draw", "meep", "quest", "questdis", "tg", "icons",
),
rps=1,
)
} }

View File

@ -60,8 +60,6 @@ class DesuChanHtmlChanHelper(ChanHelper):
for post_el in op_el.find_all("table", recursive=False): for post_el in op_el.find_all("table", recursive=False):
label = post_el.find("label") label = post_el.find("label")
if not label:
print(post_el)
*_, time = label.children *_, time = label.children
yield { yield {
"id": int(post_el.find("td", attrs={"class", "reply"}).get("id")[5:]), "id": int(post_el.find("td", attrs={"class", "reply"}).get("id")[5:]),

51
chan/tgchan_html.py Normal file
View File

@ -0,0 +1,51 @@
import datetime
import re
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from chan.desuchan_html import DesuChanHtmlChanHelper
class TgChanHtmlChanHelper(DesuChanHtmlChanHelper):
def parse_threads_list(self, r):
soup = BeautifulSoup(r.text, "html.parser")
threads = []
for threadEl in soup.find_all("div", id=lambda tid: tid and tid[6:7].isdigit()):
threads.append({
"id": int(re.search("thread([0-9]+)[a-zA-Z]*", threadEl.get("id")).group(1)),
})
for form in soup.find_all("form"):
next_button = form.find("input", attrs={"value": "Next"})
if next_button and form.get("action") != "none":
return threads, urljoin(self._base_url, form.get("action"))
return threads, None
@staticmethod
def parse_thread(r):
soup = BeautifulSoup(r.text, "html.parser")
op_el = soup.find("form", id="delform")
for post_el in op_el.find_all("table", recursive=False):
label = post_el.find("label")
*_, time = label.children
yield {
"id": int(post_el.find("td", attrs={"class", "reply"}).get("id")[5:]),
"type": "post",
"html": str(post_el),
"time": int(datetime.datetime.strptime(time, "\n\n%Y/%m/%d(%a)%H:%M\n").timestamp())
}
post_el.decompose()
*_, time = op_el.find("label").children
yield {
"id": int(op_el.find("a", attrs={"name": lambda x: x and x.isdigit()}).get("name")),
"type": "thread",
"html": str(op_el),
"time": int(datetime.datetime.strptime(time, "\n\n%Y/%m/%d(%a)%H:%M\n").timestamp())
}