add 410chan

2025-04-24 12:15:50 +00:00 · 2019-09-10 16:41:56 -04:00 · 2019-09-10 16:41:56 -04:00 · fb2f1419d8
commit fb2f1419d8
parent 6b9db95637
2 changed files with 64 additions and 1 deletions
--- a/chan/chan.py
+++ b/chan/chan.py
@ -1,4 +1,5 @@
 from chan.alokal_json import AlokalJsonChanHelper
+from chan.chan410_html import Chan410HtmlChanHelper
 from chan.desuchan_html import DesuChanHtmlChanHelper
 from chan.doushio_html import DoushioHtmlChanHelper
 from chan.endchan_html import EndchanHtmlChanHelper
@ -28,7 +29,7 @@ CHANS = {
            "news", "out", "po", "pol", "qst", "sci", "soc", "sp",
            "tg", "toy", "trv", "tv", "vp", "wsg", "wsr", "x"
        ),
-        rps=2
+        rps=3 / 2
    ),
    "lainchan": JsonChanHelper(
        2,
@ -296,5 +297,16 @@ CHANS = {
            "test", "tlp", "tmp", "tv", "vg", "vipe", "wh", "xikkadvach", "ynet"
        ),
        rps=1 / 5
+    ),
+    "410chan": Chan410HtmlChanHelper(
+        23,
+        "http://410chan.org/",
+        "http://410chan.org/",
+        "/res/",
+        "/src/",
+        (
+            "d", "b", "cu", "dev", "r", "a", "ts", "ci"
+        ),
+        rps=1 / 60
    )
 }
--- a/chan/chan410_html.py
+++ b/chan/chan410_html.py
@ -0,0 +1,51 @@
+import datetime
+import re
+
+from bs4 import BeautifulSoup
+
+from chan.desuchan_html import DesuChanHtmlChanHelper
+
+
+def _ru_datefmt(text):
+    return re.sub(r"\(.{2}\)", "", text)
+
+
+class Chan410HtmlChanHelper(DesuChanHtmlChanHelper):
+
+    def parse_threads_list(self, r):
+        soup = BeautifulSoup(r.content.decode('utf-8', 'ignore'), "html.parser")
+
+        threads = []
+
+        for threadEl in soup.find_all("div", id=lambda tid: tid and re.match("thread([0-9]+)[a-zA-Z]*", tid)):
+            omit = threadEl.find("span", class_="omittedposts")
+            threads.append({
+                "id": int(re.search("thread([0-9]+)[a-zA-Z]*", threadEl.get("id")).group(1)),
+                "omit": int(omit.text.split(" ")[1]) if omit else 0
+            })
+
+        return threads, None
+
+    @staticmethod
+    def parse_thread(r):
+        soup = BeautifulSoup(r.content.decode('utf-8', 'ignore'), "html.parser")
+
+        op_el = soup.find("form", id="delform")
+
+        for post_el in op_el.find_all("div", class_="reply"):
+            yield {
+                "id": int(post_el.get("id")[5:]),
+                "type": "post",
+                "html": str(post_el),
+                "time": int(datetime.datetime.strptime(_ru_datefmt(op_el.find("span", class_="time").text),
+                                                       "%d.%m.%Y %H:%M:%S").timestamp())
+            }
+            post_el.decompose()
+
+        yield {
+            "id": int(op_el.find("a", attrs={"name": lambda x: x and x.isdigit()}).get("name")),
+            "type": "thread",
+            "html": str(op_el),
+            "time": int(datetime.datetime.strptime(_ru_datefmt(op_el.find("span", class_="time").text),
+                                                   "%d.%m.%Y %H:%M:%S").timestamp())
+        }