mirror of
https://github.com/simon987/chan_feed.git
synced 2025-04-10 14:06:42 +00:00
add 2chan
This commit is contained in:
parent
ee666496e0
commit
1f21805667
128
chan/chan.py
128
chan/chan.py
@ -1,4 +1,5 @@
|
||||
from chan.alokal_json import AlokalJsonChanHelper
|
||||
from chan.chan2_jap import Chan2Helper
|
||||
from chan.chan410_html import Chan410HtmlChanHelper
|
||||
from chan.chan7_html import Chan7HtmlChanHelper
|
||||
from chan.chanon_html import ChanonHtmlChanHelper
|
||||
@ -460,7 +461,7 @@ CHANS = {
|
||||
"cl", "co", "ec", "es", "mx", "pe", "py", "uy", "ve", "d",
|
||||
"h", "o", "s", "sar", "scl", "sco", "ses", "smx", "spe", "sve",
|
||||
),
|
||||
rps=1/20
|
||||
rps=1 / 20
|
||||
),
|
||||
"sushigirl": JsonChanHelper(
|
||||
31,
|
||||
@ -499,4 +500,129 @@ CHANS = {
|
||||
),
|
||||
rps=1 / 15
|
||||
),
|
||||
"2chan": Chan2Helper(
|
||||
34,
|
||||
"https://<sub>.2chan.net",
|
||||
"https://<sub>.2chan.net",
|
||||
"/res/",
|
||||
"/src/",
|
||||
(
|
||||
"1", # baseball
|
||||
"12", # soccer
|
||||
"25<may>", # Mahjong
|
||||
"26<may>", # Horses
|
||||
"27<may>", # Cats,
|
||||
"d", # Animals
|
||||
"z", # Plant life
|
||||
"w", # Insects
|
||||
"49", # Aquatic life
|
||||
"62<dec>", # Outdoor
|
||||
"t", # Cooking
|
||||
"20", # Sweets
|
||||
"21", # ramen
|
||||
"e", # vehicles
|
||||
"j", # moto & scooters
|
||||
"37<nov>", # Bicycles
|
||||
"45", # Cameras
|
||||
"48", # Consumer electronics
|
||||
"r", # railroad
|
||||
"img2", # 2-D
|
||||
"b<dec>", # Nijura
|
||||
"b<may>",
|
||||
"b<jun>",
|
||||
"jun<jun>",
|
||||
|
||||
"58<dec>", # ??? 二次元裏転載不可
|
||||
"59<dec>", # ??? 二次元裏転載可
|
||||
|
||||
"id<may>", # 2-D ID
|
||||
"23", # Speedgrapher
|
||||
"18<dec>", # 2d-Live
|
||||
"16", # 2-D Neta
|
||||
"43", # 2-D Industry
|
||||
|
||||
"74<dec>", # ??? FGO
|
||||
"75<dec>", # ??? アイマス
|
||||
"78<dec>", # ??? ウメハラ総合
|
||||
|
||||
"31<jun>", # Games
|
||||
"28<nov>", # Net games
|
||||
|
||||
"56<dec>", # ??? ソシャゲ
|
||||
"60<dec>", # ??? 艦これ
|
||||
"69<dec>", # ??? モアイ
|
||||
"65<dec>", # ??? 刀剣乱舞
|
||||
"64<dec>", # ??? 占い
|
||||
"66<dec>", # ??? ファッション
|
||||
"67<dec>", # ??? 旅行
|
||||
"68<dec>", # ??? 子育て
|
||||
|
||||
"webm<may>",
|
||||
|
||||
"71<dec>", # ??? そうだね
|
||||
"82<dec>", # ??? 任天堂
|
||||
"61<dec>", # ??? ソニー
|
||||
|
||||
"10", # Net characters
|
||||
"34<nov>", # Narikiri
|
||||
"11", # Original art
|
||||
"14", # Original art flipside
|
||||
"32", # Crossdressing
|
||||
"15", # Bara
|
||||
"7", # Yuri
|
||||
"8", # Yaoi
|
||||
"o", # 2-D Guro
|
||||
"51", # 2-D Guro flipside
|
||||
"5", # Erotic games
|
||||
"3", # Homebrew PC
|
||||
"g", # Tokusatsu
|
||||
"2", # Robot manga and anime
|
||||
|
||||
"63<dec>", # 映画
|
||||
|
||||
"44", # Toys
|
||||
"v", # Models
|
||||
"y<nov>", # Models flipside nov
|
||||
"47", # Models flipside jun
|
||||
"46", # Figures
|
||||
"73<dec>", # VTuber
|
||||
"81<dec>", # 合成音声
|
||||
|
||||
"x", # 3DCG
|
||||
"35<nov>", # Politics
|
||||
"36<nov>", # Economics
|
||||
"79<dec>", # Economics
|
||||
"38", # Korean economics
|
||||
|
||||
"80<dec>", # ??? 安倍晋三
|
||||
"50<dec>", # ??? 三次実況
|
||||
|
||||
"f", # Military
|
||||
"39<may>", # Military flipside
|
||||
"m", # Mathematics
|
||||
"i", # Flash
|
||||
"k", # Wallpaper
|
||||
"l", # 2D Wallpaper
|
||||
"40<may>", # Touhou
|
||||
|
||||
"55<dec>", # ??? 東方裏
|
||||
|
||||
"p", # Oekaki
|
||||
"q<nov>", # Rakugaki
|
||||
"u", # Rakugaki flipside
|
||||
"6", # News desk
|
||||
"76<dec>", # ??? 昭和
|
||||
"77<dec>", # ??? 平成
|
||||
"9<img>", # Idle chat
|
||||
"52", # Great tohoku Earthquake of 2011
|
||||
"53", # Nuclear power
|
||||
"70<dec>", # ??? 新板提案
|
||||
"54", # IPv6
|
||||
"layout<may>",
|
||||
|
||||
"oe", # ??? お絵sql
|
||||
"72", # ??? お絵sqlip
|
||||
),
|
||||
rps=1 / 3
|
||||
),
|
||||
}
|
||||
|
127
chan/chan2_jap.py
Normal file
127
chan/chan2_jap.py
Normal file
@ -0,0 +1,127 @@
|
||||
import datetime
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from hexlib.misc import strhash, signed64
|
||||
|
||||
from chan.helper import ChanHelper
|
||||
from post_process import get_links_from_html_body
|
||||
import re
|
||||
|
||||
SUBDOMAIN_PATTERN = re.compile("<([a-z]{3})>")
|
||||
|
||||
TIME_PATTERN = re.compile(r"([0-9]{2}/[0-9]{2}/[0-9]{2}\(.\)[0-9]{2}:[0-9]{2}:[0-9]{2})")
|
||||
|
||||
|
||||
def _ja_datefmt(text):
|
||||
return re.sub(r"\(.\)", " ", text)
|
||||
|
||||
|
||||
class Chan2Helper(ChanHelper):
|
||||
|
||||
def _subdomain(self, board):
|
||||
m = SUBDOMAIN_PATTERN.search(board)
|
||||
if m:
|
||||
return m.group(1)
|
||||
return "www"
|
||||
|
||||
def _trim(self, board):
|
||||
return SUBDOMAIN_PATTERN.sub("", board)
|
||||
|
||||
def threads_url(self, board):
|
||||
return "%s/%s/" % (self._base_url.replace("<sub>", self._subdomain(board)), self._trim(board))
|
||||
|
||||
def posts_url(self, board, thread):
|
||||
return "%s/%s%s%d.htm" % (self._base_url.replace("<sub>", self._subdomain(board)), self._trim(board), self._thread_path,
|
||||
self.item_id(thread))
|
||||
|
||||
@staticmethod
|
||||
def item_id(item):
|
||||
return item["id"]
|
||||
|
||||
def item_urls(self, item, board):
|
||||
return [url for url in
|
||||
set(get_links_from_html_body(item["html"], self._base_url.replace("<sub>", self._subdomain(board))))
|
||||
if "javascript" not in url
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def item_type(item):
|
||||
return item["type"]
|
||||
|
||||
@staticmethod
|
||||
def thread_mtime(thread):
|
||||
return thread["omit"]
|
||||
|
||||
@staticmethod
|
||||
def item_mtime(item):
|
||||
return item["time"]
|
||||
|
||||
def parse_threads_list(self, r):
|
||||
soup = BeautifulSoup(r.content.decode('Shift_JIS', 'ignore'), "html.parser")
|
||||
|
||||
threads = []
|
||||
|
||||
for threadEl in soup.find_all("div", class_="thre"):
|
||||
omit = threadEl.find("font", color="#707070")
|
||||
# Example: <font color="#707070">レス9件省略。全て読むには返信ボタンを押してください。</font>
|
||||
|
||||
threads.append({
|
||||
"id": int(threadEl.get("data-res")),
|
||||
"omit": signed64(strhash(omit.text)) if omit else 0
|
||||
})
|
||||
|
||||
# for btn in soup.find_all("input"):
|
||||
# if btn.get("value") == "次のページ":
|
||||
# return threads, urljoin(r.url, btn.parent.get("action"))
|
||||
return threads, None
|
||||
|
||||
@staticmethod
|
||||
def parse_thread(r):
|
||||
soup = BeautifulSoup(r.content.decode('Shift_JIS', 'ignore'), "html.parser")
|
||||
|
||||
op_el = soup.find("div", class_="thre")
|
||||
tid = int(op_el.get("data-res"))
|
||||
|
||||
for post_el in op_el.find_all("table", recursive=False):
|
||||
|
||||
cnw = post_el.find("span", class_="cnw")
|
||||
if cnw:
|
||||
time = cnw.text.split(" ")[0]
|
||||
else:
|
||||
time = TIME_PATTERN.search(post_el.text).group(1)
|
||||
|
||||
sod = post_el.find("a", id=lambda x: x and x[2:].isnumeric())
|
||||
if sod:
|
||||
# www
|
||||
id_str = sod.get("id")[2:]
|
||||
else:
|
||||
# may
|
||||
inputEl = post_el.find("input")
|
||||
if inputEl:
|
||||
id_str = inputEl.get("name")
|
||||
else:
|
||||
id_str = post_el.find("span", id=lambda x: x).get("id")[len("delcheck"):]
|
||||
|
||||
yield {
|
||||
"id": int(id_str),
|
||||
"type": "post",
|
||||
"html": str(post_el),
|
||||
"time": int(datetime.datetime.strptime(_ja_datefmt(time), "%y/%m/%d %H:%M:%S").timestamp()),
|
||||
"parent": tid
|
||||
}
|
||||
post_el.decompose()
|
||||
|
||||
cnw = op_el.find("span", class_="cnw")
|
||||
if cnw:
|
||||
# www
|
||||
time = cnw.text.split(" ")[0]
|
||||
else:
|
||||
# may
|
||||
time = TIME_PATTERN.search(op_el.text).group(1)
|
||||
yield {
|
||||
"id": tid,
|
||||
"type": "thread",
|
||||
"html": str(op_el),
|
||||
"time": int(datetime.datetime.strptime(_ja_datefmt(time), "%y/%m/%d %H:%M:%S").timestamp()),
|
||||
}
|
8
run.py
8
run.py
@ -2,6 +2,7 @@ import datetime
|
||||
import json
|
||||
import sqlite3
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
from queue import Queue
|
||||
@ -118,11 +119,11 @@ class ChanState:
|
||||
with sqlite3.connect(self._db, timeout=5000) as conn:
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"SELECT last_modified FROM threads WHERE thread=? AND chan=?",
|
||||
"SELECT last_modified, ts FROM threads WHERE thread=? AND chan=?",
|
||||
(helper.item_unique_id(thread, board), helper.db_id)
|
||||
)
|
||||
row = cur.fetchone()
|
||||
if not row or helper.thread_mtime(thread) != row[0]:
|
||||
if not row or helper.thread_mtime(thread) != row[0] or row[1] + 86400 < int(time.time()):
|
||||
return True
|
||||
return False
|
||||
|
||||
@ -132,7 +133,7 @@ class ChanState:
|
||||
"INSERT INTO threads (thread, last_modified, chan) "
|
||||
"VALUES (?,?,?) "
|
||||
"ON CONFLICT (thread, chan) "
|
||||
"DO UPDATE SET last_modified=?",
|
||||
"DO UPDATE SET last_modified=?, ts=(strftime('%s','now'))",
|
||||
(helper.item_unique_id(thread, board), helper.thread_mtime(thread), helper.db_id,
|
||||
helper.thread_mtime(thread))
|
||||
)
|
||||
@ -183,6 +184,7 @@ def publish(item, board, helper, channel, web):
|
||||
except Exception as e:
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.error(str(e))
|
||||
time.sleep(0.5)
|
||||
channel = connect()
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user