From b6c42c1db3cb9f5c5d48dbedaab4cddf73198b09 Mon Sep 17 00:00:00 2001
From: simon <me@simon987.net>
Date: Sun, 8 Sep 2019 19:53:06 -0400
Subject: [PATCH] add fchan

---
 chan/chan.py          | 14 ++++++++++-
 chan/desuchan_html.py |  5 +---
 chan/fchan_html.py    | 58 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 5 deletions(-)
 create mode 100644 chan/fchan_html.py

diff --git a/chan/chan.py b/chan/chan.py
index 4995821..8638200 100644
--- a/chan/chan.py
+++ b/chan/chan.py
@@ -2,6 +2,7 @@ from chan.alokal_json import AlokalJsonChanHelper
 from chan.desuchan_html import DesuChanHtmlChanHelper
 from chan.doushio_html import DoushioHtmlChanHelper
 from chan.endchan_html import EndchanHtmlChanHelper
+from chan.fchan_html import FChanHtmlChanHelper
 from chan.json import JsonChanHelper
 from chan.lolnada_html import LolNadaHtmlChanHelper
 from chan.mayuri import MayuriChanHelper
@@ -106,7 +107,7 @@ CHANS = {
             "a", "fd", "ja", "ma", "vn", "fg", "fur", "gg", "ga",
             "vape", "h", "ho", "hc", "e", "fet", "sex", "fag"
         ),
-        rps=1/10
+        rps=1/5
     ),
     "endchan": EndchanHtmlChanHelper(
         8,
@@ -268,4 +269,15 @@ CHANS = {
         ),
         rps=1/20,
     ),
+    "fchan": FChanHtmlChanHelper(
+        21,
+        "http://fchan.us/",
+        "http://fchan.us/",
+        "/res/",
+        "/src/",
+        (
+            "f", "m", "h", "s", "toon", "a", "ah", "c", "artist", "crit", "b"
+        ),
+        rps=1/60,
+    ),
 }
diff --git a/chan/desuchan_html.py b/chan/desuchan_html.py
index fca50f1..dcba0ab 100644
--- a/chan/desuchan_html.py
+++ b/chan/desuchan_html.py
@@ -53,10 +53,7 @@ class DesuChanHtmlChanHelper(ChanHelper):
     def parse_thread(r):
         soup = BeautifulSoup(r.text, "html.parser")
 
-        op_el = None
-        for div in soup.find_all("div", id=lambda tid: tid and tid[1:].isdigit()):
-            op_el = div
-            break
+        op_el = soup.find("div", id=lambda tid: tid and tid[1:].isdigit())
 
         for post_el in op_el.find_all("table", recursive=False):
             label = post_el.find("label")
diff --git a/chan/fchan_html.py b/chan/fchan_html.py
new file mode 100644
index 0000000..3c44149
--- /dev/null
+++ b/chan/fchan_html.py
@@ -0,0 +1,58 @@
+import datetime
+import json
+import re
+from urllib.parse import urljoin
+
+from bs4 import BeautifulSoup
+
+from chan.desuchan_html import DesuChanHtmlChanHelper
+
+
+class FChanHtmlChanHelper(DesuChanHtmlChanHelper):
+
+    def parse_threads_list(self, r):
+        soup = BeautifulSoup(r.text, "html.parser")
+
+        threads = []
+
+        for threadEl in soup.find_all("div", id=lambda tid: tid and re.match("thread[0-9]+", tid)):
+            threads.append({
+                "id": int(threadEl.get("id")[6:]),
+            })
+
+        next_url = None
+        for a in soup.find_all("a"):
+            if a.text == "Next":
+                next_url = a
+                break
+        if next_url:
+            return threads, urljoin(r.url, next_url.get("href"))
+        return threads, None
+
+    @staticmethod
+    def parse_thread(r):
+        soup = BeautifulSoup(r.text, "html.parser")
+
+        op_el = soup.find("div", id=lambda tid: tid and re.match("thread[0-9]+", tid))
+
+        is_op = True
+
+        for post_el in op_el.find_all("table", recursive=False):
+            label = post_el.find("label")
+            *_, time = label.children
+            if is_op:
+                yield {
+                    "id": int(op_el.get("id")[6:]),
+                    "type": "thread",
+                    "html": str(post_el),
+                    "time": int(datetime.datetime.strptime(time.strip(), "%y/%m/%d(%a)%H:%M").timestamp())
+                }
+                is_op = False
+            else:
+                yield {
+                    "id": int(post_el.find("td", class_=lambda x: x and "reply" in x).get("id")[5:]),
+                    "type": "post",
+                    "html": str(post_el),
+                    "time": int(datetime.datetime.strptime(time.strip(), "%y/%m/%d(%a)%H:%M").timestamp())
+                }
+