add fchan

2025-04-24 12:15:50 +00:00 · 2019-09-08 19:53:06 -04:00 · 2019-09-08 19:53:06 -04:00 · b6c42c1db3
commit b6c42c1db3
parent 60fa4893d8
3 changed files with 72 additions and 5 deletions
--- a/chan/chan.py
+++ b/chan/chan.py
@ -2,6 +2,7 @@ from chan.alokal_json import AlokalJsonChanHelper
 from chan.desuchan_html import DesuChanHtmlChanHelper
 from chan.doushio_html import DoushioHtmlChanHelper
 from chan.endchan_html import EndchanHtmlChanHelper
+from chan.fchan_html import FChanHtmlChanHelper
 from chan.json import JsonChanHelper
 from chan.lolnada_html import LolNadaHtmlChanHelper
 from chan.mayuri import MayuriChanHelper
@ -106,7 +107,7 @@ CHANS = {
            "a", "fd", "ja", "ma", "vn", "fg", "fur", "gg", "ga",
            "vape", "h", "ho", "hc", "e", "fet", "sex", "fag"
        ),
-        rps=1/10
+        rps=1/5
    ),
    "endchan": EndchanHtmlChanHelper(
        8,
@ -268,4 +269,15 @@ CHANS = {
        ),
        rps=1/20,
    ),
+    "fchan": FChanHtmlChanHelper(
+        21,
+        "http://fchan.us/",
+        "http://fchan.us/",
+        "/res/",
+        "/src/",
+        (
+            "f", "m", "h", "s", "toon", "a", "ah", "c", "artist", "crit", "b"
+        ),
+        rps=1/60,
+    ),
 }
--- a/chan/desuchan_html.py
+++ b/chan/desuchan_html.py
@ -53,10 +53,7 @@ class DesuChanHtmlChanHelper(ChanHelper):
    def parse_thread(r):
        soup = BeautifulSoup(r.text, "html.parser")

-        op_el = None
-        for div in soup.find_all("div", id=lambda tid: tid and tid[1:].isdigit()):
-            op_el = div
-            break
+        op_el = soup.find("div", id=lambda tid: tid and tid[1:].isdigit())

        for post_el in op_el.find_all("table", recursive=False):
            label = post_el.find("label")
--- a/chan/fchan_html.py
+++ b/chan/fchan_html.py
@ -0,0 +1,58 @@
+import datetime
+import json
+import re
+from urllib.parse import urljoin
+
+from bs4 import BeautifulSoup
+
+from chan.desuchan_html import DesuChanHtmlChanHelper
+
+
+class FChanHtmlChanHelper(DesuChanHtmlChanHelper):
+
+    def parse_threads_list(self, r):
+        soup = BeautifulSoup(r.text, "html.parser")
+
+        threads = []
+
+        for threadEl in soup.find_all("div", id=lambda tid: tid and re.match("thread[0-9]+", tid)):
+            threads.append({
+                "id": int(threadEl.get("id")[6:]),
+            })
+
+        next_url = None
+        for a in soup.find_all("a"):
+            if a.text == "Next":
+                next_url = a
+                break
+        if next_url:
+            return threads, urljoin(r.url, next_url.get("href"))
+        return threads, None
+
+    @staticmethod
+    def parse_thread(r):
+        soup = BeautifulSoup(r.text, "html.parser")
+
+        op_el = soup.find("div", id=lambda tid: tid and re.match("thread[0-9]+", tid))
+
+        is_op = True
+
+        for post_el in op_el.find_all("table", recursive=False):
+            label = post_el.find("label")
+            *_, time = label.children
+            if is_op:
+                yield {
+                    "id": int(op_el.get("id")[6:]),
+                    "type": "thread",
+                    "html": str(post_el),
+                    "time": int(datetime.datetime.strptime(time.strip(), "%y/%m/%d(%a)%H:%M").timestamp())
+                }
+                is_op = False
+            else:
+                yield {
+                    "id": int(post_el.find("td", class_=lambda x: x and "reply" in x).get("id")[5:]),
+                    "type": "post",
+                    "html": str(post_el),
+                    "time": int(datetime.datetime.strptime(time.strip(), "%y/%m/%d(%a)%H:%M").timestamp())
+                }
+