add plus4chan

2025-04-24 12:15:50 +00:00 · 2019-11-22 12:14:23 -05:00 · 2019-11-22 12:14:23 -05:00 · ee666496e0
commit ee666496e0
parent 82578daecd
3 changed files with 95 additions and 1 deletions
--- a/chan/chan.py
+++ b/chan/chan.py
@ -14,6 +14,7 @@ from chan.kev4_php import Kev4PhpHelper
 from chan.lolnada_html import LolNadaHtmlChanHelper
 from chan.mayuri import MayuriChanHelper
 from chan.nowere_html import NowereHtmlChanHelper
+from chan.plus4chan_html import Plus4ChanHelper
 from chan.russian_json import RussianJsonChanHelper
 from chan.synch_json import SynchJsonChanHelper
 from chan.tgchan_html import TgChanHtmlChanHelper
@ -486,4 +487,16 @@ CHANS = {
        ),
        rps=1 / 20
    ),
+    "plus4chan": Plus4ChanHelper(
+        33,
+        "https://boards.plus4chan.org/",
+        "https://boards.plus4chan.org/",
+        "",
+        "",
+        (
+            "baw", "co", "cog", "jam", "mtv",
+            "coc", "draw", "pco", "coq", "cod", "a"
+        ),
+        rps=1 / 15
+    ),
 }
--- a/chan/kev4_php.py
+++ b/chan/kev4_php.py
@ -41,7 +41,7 @@ class Kev4PhpHelper(ChanHelper):

        for threadEl in soup.find_all("p", class_="info"):
            threads.append({
-                "id": int(threadEl.find("a").get("id")[len("expandButtun"):]),
+                "id": int(threadEl.find("a").get("id")[len("expandButton"):]),
                "omit": int(threadEl.text.split(" ")[1])
            })

--- a/chan/plus4chan_html.py
+++ b/chan/plus4chan_html.py
@ -0,0 +1,81 @@
+import datetime
+from urllib.parse import urljoin
+
+from bs4 import BeautifulSoup
+
+from chan.helper import ChanHelper
+from post_process import get_links_from_html_body
+
+
+class Plus4ChanHelper(ChanHelper):
+
+    def threads_url(self, board):
+        return "%s%s/" % (self._base_url, board)
+
+    def posts_url(self, board, thread):
+        return "%s%s/t%d.html" % (self._base_url, board, self.item_id(thread))
+
+    @staticmethod
+    def item_id(item):
+        return item["id"]
+
+    def item_urls(self, item, board):
+        return list(set(get_links_from_html_body(item["html"], self._base_url)))
+
+    @staticmethod
+    def item_type(item):
+        return item["type"]
+
+    @staticmethod
+    def thread_mtime(thread):
+        return thread["omit"]
+
+    @staticmethod
+    def item_mtime(item):
+        return item["time"]
+
+    def parse_threads_list(self, r):
+        soup = BeautifulSoup(r.content.decode('utf-8', 'ignore'), "html.parser")
+
+        threads = []
+
+        for threadEl in soup.find_all("section", class_="t", id=lambda x: x and x[1:].isnumeric()):
+
+            omit = threadEl.find("a", class_="omittedbreakdown")
+
+            threads.append({
+                "id": int(threadEl.get("id")[1:]),
+                "omit": int(omit.text.split(" ")[1]) if omit else 0
+            })
+
+        for link in soup.find_all("a", href=lambda x: x):
+            if link.text == "next":
+                return threads, urljoin(r.url, link.get("href"))
+        return threads, None
+
+    @staticmethod
+    def parse_thread(r):
+        soup = BeautifulSoup(r.content.decode('utf-8', 'ignore'), "html.parser")
+
+        thread_el = soup.find("section", class_="t")
+        tid = int(thread_el.get("id")[1:])
+
+        for post_el in soup.find_all("div", class_="p", id=lambda x: x and x[1:].isnumeric()):
+            pid = int(post_el.get("id")[1:])
+            if pid == tid:
+                yield {
+                    "id": tid,
+                    "type": "thread",
+                    "html": str(post_el),
+                    "time": int(datetime.datetime.strptime(post_el.find("time", class_="date").text,
+                                                           "%Y/%m/%d %H:%M:%S").timestamp())
+                }
+            else:
+                yield {
+                    "id": pid,
+                    "type": "post",
+                    "html": str(post_el),
+                    "time": int(datetime.datetime.strptime(post_el.find("time", class_="date").text,
+                                                           "%Y/%m/%d %H:%M:%S").timestamp()),
+                    "parent": tid
+                }