Add two Lynx chans, update dependencies

2025-04-24 12:15:50 +00:00 · 2019-12-25 17:21:37 -05:00 · 2019-12-25 17:21:37 -05:00 · 77a053d6ee
commit 77a053d6ee
parent 7ea1612b32
9 changed files with 119 additions and 12 deletions
--- a/README.md
+++ b/README.md
@ -7,7 +7,7 @@ image boards and publishes serialised JSON to RabbitMQ
 Compatible image boards: 4chan, lainchan, uboachan,
 22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal,
 horochan, doushio, desuchan, tgchan, lolnada, 7chan, chanon,
-chan.org.li, hispachan, 8kun, nowere, iichan and more.
+chan.org.li, hispachan, 8kun, nowere, iichan, 2chan and more.

 Can optionally push monitoring data to InfluxDB. Below is an
 example of Grafana being used to display it.
--- a/chan/alokal_json.py
+++ b/chan/alokal_json.py
@ -1,4 +1,4 @@
-from chan.json import JsonChanHelper
+from chan.chan_json import JsonChanHelper
 from post_process import get_links_from_body


--- a/chan/chan.py
+++ b/chan/chan.py
@ -9,10 +9,11 @@ from chan.endchan_html import EndchanHtmlChanHelper
 from chan.fchan_html import FChanHtmlChanHelper
 from chan.hispachan_html import HispachanHtmlHelper
 from chan.iichan_html import IichanHtmlChanHelper
-from chan.json import JsonChanHelper
+from chan.chan_json import JsonChanHelper
 from chan.json_kun import JsonKunChanHelper
 from chan.kev4_php import Kev4PhpHelper
 from chan.lolnada_html import LolNadaHtmlChanHelper
+from chan.lynx import LynxChanHelper
 from chan.mayuri import MayuriChanHelper
 from chan.nowere_html import NowereHtmlChanHelper
 from chan.plus4chan_html import Plus4ChanHelper
@ -596,5 +597,26 @@ CHANS = {
        ),
        rps=1 / 3
    ),
-    # next is 36
+    "waifuist": LynxChanHelper(
+        36,
+        "https://waifuist.pro/",
+        "https://waifuist.pro/",
+        "/res/",
+        "",
+        (
+            "w", "starlet", "etc",
+        ),
+        rps=1 / 25
+    ),
+    "cutiegarden": LynxChanHelper(
+        37,
+        "https://cutie.garden/",
+        "https://cutie.garden/",
+        "/res/",
+        "",
+        (
+            "lg", "cozy", "meta", "test"
+        ),
+        rps=1 / 25
+    ),
 }
--- a/chan/chan_json.py
+++ b/chan/chan_json.py
--- a/chan/json_kun.py
+++ b/chan/json_kun.py
@ -1,6 +1,6 @@
 from vanwanet_scrape.scraper import Scraper

-from chan.json import JsonChanHelper
+from chan.chan_json import JsonChanHelper
 from util import logger


--- a/chan/lynx.py
+++ b/chan/lynx.py
@ -0,0 +1,83 @@
+import json
+from datetime import datetime
+from json import JSONDecodeError
+from urllib.parse import urljoin
+
+import cloudscraper
+
+from chan.helper import ChanHelper
+from util import logger
+
+
+class LynxChanHelper(ChanHelper):
+    """See https://gitgud.io/LynxChan/LynxChan/blob/master/doc/Json.txt"""
+
+    def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps):
+        super().__init__(db_id, base_url, image_url, thread_path, image_path, boards, rps)
+
+        scraper = cloudscraper.create_scraper()
+        self.get_method = scraper.get
+
+    @staticmethod
+    def item_id(item):
+        return item["threadId"] if LynxChanHelper.item_type(item) == "thread" else item["postId"]
+
+    @staticmethod
+    def item_mtime(item):
+        return datetime.fromisoformat(item["creation"]).timestamp()
+
+    def item_urls(self, item, board):
+        return [
+            urljoin(self._base_url, im["path"])
+            for im in item["files"]
+        ] if "files" in item and item["files"] else []
+
+    @staticmethod
+    def item_type(item):
+        return "thread" if "threadId" in item else "post"
+
+    def threads_url(self, board):
+        return "%s%s/1.json" % (self._base_url, board)
+
+    @staticmethod
+    def thread_mtime(thread):
+        return (thread["ommitedPosts"] if "ommitedPosts" in thread else 0) + len(thread["posts"])
+
+    @staticmethod
+    def parse_threads_list(r):
+        try:
+            j = json.loads(r.content.decode('utf-8', 'ignore'))
+            if len(j) == 0 or "threads" not in j:
+                logger.warning("No threads in response for %s: %s" % (r.url, r.text,))
+                return [], None
+        except JSONDecodeError:
+            logger.warning("JSONDecodeError for %s:" % (r.url,))
+            logger.warning(r.text)
+            return [], None
+
+        next_page = None
+        url = r.url[:r.url.rfind("?")] if "?" in r.url else r.url
+        current_page = int(url[url.rfind("/") + 1:-5])
+        if current_page < j["pageCount"]:
+            next_page = urljoin(r.url, "%d.json" % (current_page + 1))
+
+        return j["threads"], next_page
+
+    @staticmethod
+    def parse_thread(r):
+        try:
+            j = json.loads(r.content.decode('utf-8', 'ignore'))
+        except JSONDecodeError:
+            logger.warning("JSONDecodeError for %s:" % (r.url,))
+            logger.warning(r.text)
+            return []
+
+        all_items = []
+        for post in j["posts"]:
+            post["_parent"] = j["threadId"]
+            all_items.append(post)
+
+        del j["posts"]
+        all_items.append(j)
+
+        return all_items
--- a/chan/synch_json.py
+++ b/chan/synch_json.py
@ -1,4 +1,4 @@
-from chan.json import JsonChanHelper
+from chan.chan_json import JsonChanHelper
 from post_process import get_links_from_body


--- a/requirements.txt
+++ b/requirements.txt
@ -7,4 +7,6 @@ influxdb
 pika
 bs4
 urllib3
-git+git://github.com/simon987/hexlib.git
+git+git://github.com/simon987/hexlib.git
+git+git://github.com/simon987/vanwanet_scrape.git
+cloudscraper
--- a/run.py
+++ b/run.py
@ -102,14 +102,14 @@ class ChanState:
            conn.commit()

    def mark_visited(self, item: int, helper):
-        with sqlite3.connect(self._db) as conn:
+        with sqlite3.connect(self._db, timeout=10000) as conn:
            conn.execute(
                "INSERT INTO posts (post, chan) VALUES (?,?)",
                (item, helper.db_id)
            )

    def has_visited(self, item: int, helper):
-        with sqlite3.connect(self._db) as conn:
+        with sqlite3.connect(self._db, timeout=10000) as conn:
            cur = conn.cursor()
            cur.execute(
                "SELECT post FROM posts WHERE post=? AND chan=?",
@ -122,7 +122,7 @@ class ChanState:
        if mtime == -1:
            return True

-        with sqlite3.connect(self._db, timeout=5000) as conn:
+        with sqlite3.connect(self._db, timeout=10000) as conn:
            cur = conn.cursor()
            cur.execute(
                "SELECT last_modified, ts FROM threads WHERE thread=? AND chan=?",
@ -134,7 +134,7 @@ class ChanState:
            return False

    def mark_thread_as_visited(self, thread, helper, board):
-        with sqlite3.connect(self._db, timeout=5000) as conn:
+        with sqlite3.connect(self._db, timeout=10000) as conn:
            conn.execute(
                "INSERT INTO threads (thread, last_modified, chan) "
                "VALUES (?,?,?) "
@ -243,7 +243,7 @@ if __name__ == "__main__":
    state = ChanState()

    publish_q = Queue()
-    for _ in range(5):
+    for _ in range(10):
        publish_thread = Thread(target=publish_worker, args=(publish_q, chan_helper, proxy))
        publish_thread.setDaemon(True)
        publish_thread.start()