chan_feed/chan/lynx.py

89 lines
2.7 KiB
Python

import json
from datetime import datetime
from json import JSONDecodeError
from urllib.parse import urljoin
import cloudscraper
import sys
from chan.helper import ChanHelper
from hexlib.log import logger
class LynxChanHelper(ChanHelper):
"""See https://gitgud.io/LynxChan/LynxChan/blob/master/doc/Json.txt"""
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards):
super().__init__(db_id, base_url, image_url, thread_path, image_path, boards)
scraper = cloudscraper.create_scraper()
if len(sys.argv) > 3:
proxy = sys.argv[3]
scraper.proxies = {"http": proxy, "https": proxy}
scraper.verify = False
self.get_method = scraper.get
@staticmethod
def item_id(item):
return item["threadId"] if LynxChanHelper.item_type(item) == "thread" else item["postId"]
@staticmethod
def item_mtime(item):
return datetime.fromisoformat(item["creation"][:-1]).timestamp()
def item_urls(self, item, board):
return [
urljoin(self._base_url, im["path"])
for im in item["files"]
] if "files" in item and item["files"] else []
@staticmethod
def item_type(item):
return "thread" if "threadId" in item else "post"
def threads_url(self, board):
return "%s%s/1.json" % (self._base_url, board)
@staticmethod
def thread_mtime(thread):
return (thread["ommitedPosts"] if "ommitedPosts" in thread else 0) + len(thread["posts"])
@staticmethod
def parse_threads_list(r):
try:
j = json.loads(r.content.decode('utf-8', 'ignore'))
if len(j) == 0 or "threads" not in j:
logger.warning("No threads in response for %s: %s" % (r.url, r.text,))
return [], None
except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text)
return [], None
next_page = None
url = r.url[:r.url.rfind("?")] if "?" in r.url else r.url
current_page = int(url[url.rfind("/") + 1:-5])
if current_page < j["pageCount"]:
next_page = urljoin(r.url, "%d.json" % (current_page + 1))
return j["threads"], next_page
@staticmethod
def parse_thread(r):
try:
j = json.loads(r.content.decode('utf-8', 'ignore'))
except JSONDecodeError:
logger.warning("JSONDecodeError for %s:" % (r.url,))
logger.warning(r.text)
return []
all_items = []
for post in j["posts"]:
post["_parent"] = j["threadId"]
all_items.append(post)
del j["posts"]
all_items.append(j)
return all_items