chan_feed/chan/helper.py

76 lines
2.1 KiB
Python

from bs4 import BeautifulSoup
class ChanHelper:
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps):
self.db_id = db_id
self._base_url = base_url
self._image_url = image_url
self._thread_path = thread_path
self._image_path = image_path
self._boards = boards
self.rps = rps
def boards(self):
return [b for b in self._boards if not b.startswith("_")]
def image_url(self, board, tim, extension):
return "%s%s%s%s%s" % (self._image_url, board, self._image_path, tim, extension)
def threads_url(self, board):
return "%s%s/threads.json" % (self._base_url, board)
def posts_url(self, board, thread):
return "%s%s%s%d.json" % (self._base_url, board, self._thread_path, thread)
def board_hash(self, board):
return str((self._boards.index(board) + 1) * 10000)
@staticmethod
def item_id(item):
raise NotImplementedError
@staticmethod
def item_mtime(item):
raise NotImplementedError
def item_unique_id(self, item, board):
return int(self.board_hash(board) + str(self.item_id(item)))
@staticmethod
def thread_mtime(thread):
raise NotImplementedError
def item_urls(self, item, board):
raise NotImplementedError
@staticmethod
def item_type(item):
raise NotImplementedError
@staticmethod
def parse_threads_list(r):
raise NotImplementedError
@staticmethod
def parse_thread(r):
raise NotImplementedError
@staticmethod
def parse_thread(r):
soup = BeautifulSoup(r.text, "html.parser")
op_el = soup.find("div", attrs={"class": "innerOP"})
yield {
"id": int(soup.find("div", class_="opCell").get("id")),
"type": "thread",
"html": str(op_el),
}
for post_el in soup.find_all("div", class_="postCell"):
yield {
"id": int(post_el.get("id")),
"type": "post",
"html": str(post_el),
}