mirror of
https://github.com/simon987/chan_feed.git
synced 2025-04-19 18:16:44 +00:00
Add two Lynx chans, update dependencies
This commit is contained in:
parent
7ea1612b32
commit
77a053d6ee
@ -7,7 +7,7 @@ image boards and publishes serialised JSON to RabbitMQ
|
|||||||
Compatible image boards: 4chan, lainchan, uboachan,
|
Compatible image boards: 4chan, lainchan, uboachan,
|
||||||
22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal,
|
22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal,
|
||||||
horochan, doushio, desuchan, tgchan, lolnada, 7chan, chanon,
|
horochan, doushio, desuchan, tgchan, lolnada, 7chan, chanon,
|
||||||
chan.org.li, hispachan, 8kun, nowere, iichan and more.
|
chan.org.li, hispachan, 8kun, nowere, iichan, 2chan and more.
|
||||||
|
|
||||||
Can optionally push monitoring data to InfluxDB. Below is an
|
Can optionally push monitoring data to InfluxDB. Below is an
|
||||||
example of Grafana being used to display it.
|
example of Grafana being used to display it.
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from chan.json import JsonChanHelper
|
from chan.chan_json import JsonChanHelper
|
||||||
from post_process import get_links_from_body
|
from post_process import get_links_from_body
|
||||||
|
|
||||||
|
|
||||||
|
26
chan/chan.py
26
chan/chan.py
@ -9,10 +9,11 @@ from chan.endchan_html import EndchanHtmlChanHelper
|
|||||||
from chan.fchan_html import FChanHtmlChanHelper
|
from chan.fchan_html import FChanHtmlChanHelper
|
||||||
from chan.hispachan_html import HispachanHtmlHelper
|
from chan.hispachan_html import HispachanHtmlHelper
|
||||||
from chan.iichan_html import IichanHtmlChanHelper
|
from chan.iichan_html import IichanHtmlChanHelper
|
||||||
from chan.json import JsonChanHelper
|
from chan.chan_json import JsonChanHelper
|
||||||
from chan.json_kun import JsonKunChanHelper
|
from chan.json_kun import JsonKunChanHelper
|
||||||
from chan.kev4_php import Kev4PhpHelper
|
from chan.kev4_php import Kev4PhpHelper
|
||||||
from chan.lolnada_html import LolNadaHtmlChanHelper
|
from chan.lolnada_html import LolNadaHtmlChanHelper
|
||||||
|
from chan.lynx import LynxChanHelper
|
||||||
from chan.mayuri import MayuriChanHelper
|
from chan.mayuri import MayuriChanHelper
|
||||||
from chan.nowere_html import NowereHtmlChanHelper
|
from chan.nowere_html import NowereHtmlChanHelper
|
||||||
from chan.plus4chan_html import Plus4ChanHelper
|
from chan.plus4chan_html import Plus4ChanHelper
|
||||||
@ -596,5 +597,26 @@ CHANS = {
|
|||||||
),
|
),
|
||||||
rps=1 / 3
|
rps=1 / 3
|
||||||
),
|
),
|
||||||
# next is 36
|
"waifuist": LynxChanHelper(
|
||||||
|
36,
|
||||||
|
"https://waifuist.pro/",
|
||||||
|
"https://waifuist.pro/",
|
||||||
|
"/res/",
|
||||||
|
"",
|
||||||
|
(
|
||||||
|
"w", "starlet", "etc",
|
||||||
|
),
|
||||||
|
rps=1 / 25
|
||||||
|
),
|
||||||
|
"cutiegarden": LynxChanHelper(
|
||||||
|
37,
|
||||||
|
"https://cutie.garden/",
|
||||||
|
"https://cutie.garden/",
|
||||||
|
"/res/",
|
||||||
|
"",
|
||||||
|
(
|
||||||
|
"lg", "cozy", "meta", "test"
|
||||||
|
),
|
||||||
|
rps=1 / 25
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from vanwanet_scrape.scraper import Scraper
|
from vanwanet_scrape.scraper import Scraper
|
||||||
|
|
||||||
from chan.json import JsonChanHelper
|
from chan.chan_json import JsonChanHelper
|
||||||
from util import logger
|
from util import logger
|
||||||
|
|
||||||
|
|
||||||
|
83
chan/lynx.py
Normal file
83
chan/lynx.py
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
from json import JSONDecodeError
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
import cloudscraper
|
||||||
|
|
||||||
|
from chan.helper import ChanHelper
|
||||||
|
from util import logger
|
||||||
|
|
||||||
|
|
||||||
|
class LynxChanHelper(ChanHelper):
|
||||||
|
"""See https://gitgud.io/LynxChan/LynxChan/blob/master/doc/Json.txt"""
|
||||||
|
|
||||||
|
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps):
|
||||||
|
super().__init__(db_id, base_url, image_url, thread_path, image_path, boards, rps)
|
||||||
|
|
||||||
|
scraper = cloudscraper.create_scraper()
|
||||||
|
self.get_method = scraper.get
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def item_id(item):
|
||||||
|
return item["threadId"] if LynxChanHelper.item_type(item) == "thread" else item["postId"]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def item_mtime(item):
|
||||||
|
return datetime.fromisoformat(item["creation"]).timestamp()
|
||||||
|
|
||||||
|
def item_urls(self, item, board):
|
||||||
|
return [
|
||||||
|
urljoin(self._base_url, im["path"])
|
||||||
|
for im in item["files"]
|
||||||
|
] if "files" in item and item["files"] else []
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def item_type(item):
|
||||||
|
return "thread" if "threadId" in item else "post"
|
||||||
|
|
||||||
|
def threads_url(self, board):
|
||||||
|
return "%s%s/1.json" % (self._base_url, board)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def thread_mtime(thread):
|
||||||
|
return (thread["ommitedPosts"] if "ommitedPosts" in thread else 0) + len(thread["posts"])
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parse_threads_list(r):
|
||||||
|
try:
|
||||||
|
j = json.loads(r.content.decode('utf-8', 'ignore'))
|
||||||
|
if len(j) == 0 or "threads" not in j:
|
||||||
|
logger.warning("No threads in response for %s: %s" % (r.url, r.text,))
|
||||||
|
return [], None
|
||||||
|
except JSONDecodeError:
|
||||||
|
logger.warning("JSONDecodeError for %s:" % (r.url,))
|
||||||
|
logger.warning(r.text)
|
||||||
|
return [], None
|
||||||
|
|
||||||
|
next_page = None
|
||||||
|
url = r.url[:r.url.rfind("?")] if "?" in r.url else r.url
|
||||||
|
current_page = int(url[url.rfind("/") + 1:-5])
|
||||||
|
if current_page < j["pageCount"]:
|
||||||
|
next_page = urljoin(r.url, "%d.json" % (current_page + 1))
|
||||||
|
|
||||||
|
return j["threads"], next_page
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parse_thread(r):
|
||||||
|
try:
|
||||||
|
j = json.loads(r.content.decode('utf-8', 'ignore'))
|
||||||
|
except JSONDecodeError:
|
||||||
|
logger.warning("JSONDecodeError for %s:" % (r.url,))
|
||||||
|
logger.warning(r.text)
|
||||||
|
return []
|
||||||
|
|
||||||
|
all_items = []
|
||||||
|
for post in j["posts"]:
|
||||||
|
post["_parent"] = j["threadId"]
|
||||||
|
all_items.append(post)
|
||||||
|
|
||||||
|
del j["posts"]
|
||||||
|
all_items.append(j)
|
||||||
|
|
||||||
|
return all_items
|
@ -1,4 +1,4 @@
|
|||||||
from chan.json import JsonChanHelper
|
from chan.chan_json import JsonChanHelper
|
||||||
from post_process import get_links_from_body
|
from post_process import get_links_from_body
|
||||||
|
|
||||||
|
|
||||||
|
@ -8,3 +8,5 @@ pika
|
|||||||
bs4
|
bs4
|
||||||
urllib3
|
urllib3
|
||||||
git+git://github.com/simon987/hexlib.git
|
git+git://github.com/simon987/hexlib.git
|
||||||
|
git+git://github.com/simon987/vanwanet_scrape.git
|
||||||
|
cloudscraper
|
10
run.py
10
run.py
@ -102,14 +102,14 @@ class ChanState:
|
|||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
def mark_visited(self, item: int, helper):
|
def mark_visited(self, item: int, helper):
|
||||||
with sqlite3.connect(self._db) as conn:
|
with sqlite3.connect(self._db, timeout=10000) as conn:
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"INSERT INTO posts (post, chan) VALUES (?,?)",
|
"INSERT INTO posts (post, chan) VALUES (?,?)",
|
||||||
(item, helper.db_id)
|
(item, helper.db_id)
|
||||||
)
|
)
|
||||||
|
|
||||||
def has_visited(self, item: int, helper):
|
def has_visited(self, item: int, helper):
|
||||||
with sqlite3.connect(self._db) as conn:
|
with sqlite3.connect(self._db, timeout=10000) as conn:
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"SELECT post FROM posts WHERE post=? AND chan=?",
|
"SELECT post FROM posts WHERE post=? AND chan=?",
|
||||||
@ -122,7 +122,7 @@ class ChanState:
|
|||||||
if mtime == -1:
|
if mtime == -1:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
with sqlite3.connect(self._db, timeout=5000) as conn:
|
with sqlite3.connect(self._db, timeout=10000) as conn:
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"SELECT last_modified, ts FROM threads WHERE thread=? AND chan=?",
|
"SELECT last_modified, ts FROM threads WHERE thread=? AND chan=?",
|
||||||
@ -134,7 +134,7 @@ class ChanState:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def mark_thread_as_visited(self, thread, helper, board):
|
def mark_thread_as_visited(self, thread, helper, board):
|
||||||
with sqlite3.connect(self._db, timeout=5000) as conn:
|
with sqlite3.connect(self._db, timeout=10000) as conn:
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"INSERT INTO threads (thread, last_modified, chan) "
|
"INSERT INTO threads (thread, last_modified, chan) "
|
||||||
"VALUES (?,?,?) "
|
"VALUES (?,?,?) "
|
||||||
@ -243,7 +243,7 @@ if __name__ == "__main__":
|
|||||||
state = ChanState()
|
state = ChanState()
|
||||||
|
|
||||||
publish_q = Queue()
|
publish_q = Queue()
|
||||||
for _ in range(5):
|
for _ in range(10):
|
||||||
publish_thread = Thread(target=publish_worker, args=(publish_q, chan_helper, proxy))
|
publish_thread = Thread(target=publish_worker, args=(publish_q, chan_helper, proxy))
|
||||||
publish_thread.setDaemon(True)
|
publish_thread.setDaemon(True)
|
||||||
publish_thread.start()
|
publish_thread.start()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user