mirror of
https://github.com/simon987/chan_feed.git
synced 2025-04-10 14:06:42 +00:00
76 lines
2.1 KiB
Python
76 lines
2.1 KiB
Python
from bs4 import BeautifulSoup
|
|
|
|
|
|
class ChanHelper:
|
|
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps):
|
|
self.db_id = db_id
|
|
self._base_url = base_url
|
|
self._image_url = image_url
|
|
self._thread_path = thread_path
|
|
self._image_path = image_path
|
|
self._boards = boards
|
|
self.rps = rps
|
|
|
|
def boards(self):
|
|
return [b for b in self._boards if not b.startswith("_")]
|
|
|
|
def image_url(self, board, tim, extension):
|
|
return "%s%s%s%s%s" % (self._image_url, board, self._image_path, tim, extension)
|
|
|
|
def threads_url(self, board):
|
|
return "%s%s/threads.json" % (self._base_url, board)
|
|
|
|
def posts_url(self, board, thread):
|
|
return "%s%s%s%d.json" % (self._base_url, board, self._thread_path, thread)
|
|
|
|
def board_hash(self, board):
|
|
return str((self._boards.index(board) + 1) * 10000)
|
|
|
|
@staticmethod
|
|
def item_id(item):
|
|
raise NotImplementedError
|
|
|
|
@staticmethod
|
|
def item_mtime(item):
|
|
raise NotImplementedError
|
|
|
|
def item_unique_id(self, item, board):
|
|
return int(self.board_hash(board) + str(self.item_id(item)))
|
|
|
|
@staticmethod
|
|
def thread_mtime(thread):
|
|
raise NotImplementedError
|
|
|
|
def item_urls(self, item, board):
|
|
raise NotImplementedError
|
|
|
|
@staticmethod
|
|
def item_type(item):
|
|
raise NotImplementedError
|
|
|
|
@staticmethod
|
|
def parse_threads_list(r):
|
|
raise NotImplementedError
|
|
|
|
@staticmethod
|
|
def parse_thread(r):
|
|
raise NotImplementedError
|
|
|
|
@staticmethod
|
|
def parse_thread(r):
|
|
soup = BeautifulSoup(r.text, "html.parser")
|
|
|
|
op_el = soup.find("div", attrs={"class": "innerOP"})
|
|
yield {
|
|
"id": int(soup.find("div", class_="opCell").get("id")),
|
|
"type": "thread",
|
|
"html": str(op_el),
|
|
}
|
|
|
|
for post_el in soup.find_all("div", class_="postCell"):
|
|
yield {
|
|
"id": int(post_el.get("id")),
|
|
"type": "post",
|
|
"html": str(post_el),
|
|
}
|