mirror of
https://github.com/simon987/chan_feed.git
synced 2025-04-10 14:06:42 +00:00
89 lines
2.7 KiB
Python
89 lines
2.7 KiB
Python
import json
|
|
from datetime import datetime
|
|
from json import JSONDecodeError
|
|
from urllib.parse import urljoin
|
|
|
|
import cloudscraper
|
|
import sys
|
|
|
|
from chan.helper import ChanHelper
|
|
from hexlib.log import logger
|
|
|
|
|
|
class LynxChanHelper(ChanHelper):
|
|
"""See https://gitgud.io/LynxChan/LynxChan/blob/master/doc/Json.txt"""
|
|
|
|
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards):
|
|
super().__init__(db_id, base_url, image_url, thread_path, image_path, boards)
|
|
|
|
scraper = cloudscraper.create_scraper()
|
|
if len(sys.argv) > 3:
|
|
proxy = sys.argv[3]
|
|
scraper.proxies = {"http": proxy, "https": proxy}
|
|
scraper.verify = False
|
|
self.get_method = scraper.get
|
|
|
|
@staticmethod
|
|
def item_id(item):
|
|
return item["threadId"] if LynxChanHelper.item_type(item) == "thread" else item["postId"]
|
|
|
|
@staticmethod
|
|
def item_mtime(item):
|
|
return datetime.fromisoformat(item["creation"][:-1]).timestamp()
|
|
|
|
def item_urls(self, item, board):
|
|
return [
|
|
urljoin(self._base_url, im["path"])
|
|
for im in item["files"]
|
|
] if "files" in item and item["files"] else []
|
|
|
|
@staticmethod
|
|
def item_type(item):
|
|
return "thread" if "threadId" in item else "post"
|
|
|
|
def threads_url(self, board):
|
|
return "%s%s/1.json" % (self._base_url, board)
|
|
|
|
@staticmethod
|
|
def thread_mtime(thread):
|
|
return (thread["ommitedPosts"] if "ommitedPosts" in thread else 0) + len(thread["posts"])
|
|
|
|
@staticmethod
|
|
def parse_threads_list(r):
|
|
try:
|
|
j = json.loads(r.content.decode('utf-8', 'ignore'))
|
|
if len(j) == 0 or "threads" not in j:
|
|
logger.warning("No threads in response for %s: %s" % (r.url, r.text,))
|
|
return [], None
|
|
except JSONDecodeError:
|
|
logger.warning("JSONDecodeError for %s:" % (r.url,))
|
|
logger.warning(r.text)
|
|
return [], None
|
|
|
|
next_page = None
|
|
url = r.url[:r.url.rfind("?")] if "?" in r.url else r.url
|
|
current_page = int(url[url.rfind("/") + 1:-5])
|
|
if current_page < j["pageCount"]:
|
|
next_page = urljoin(r.url, "%d.json" % (current_page + 1))
|
|
|
|
return j["threads"], next_page
|
|
|
|
@staticmethod
|
|
def parse_thread(r):
|
|
try:
|
|
j = json.loads(r.content.decode('utf-8', 'ignore'))
|
|
except JSONDecodeError:
|
|
logger.warning("JSONDecodeError for %s:" % (r.url,))
|
|
logger.warning(r.text)
|
|
return []
|
|
|
|
all_items = []
|
|
for post in j["posts"]:
|
|
post["_parent"] = j["threadId"]
|
|
all_items.append(post)
|
|
|
|
del j["posts"]
|
|
all_items.append(j)
|
|
|
|
return all_items
|