Compare commits

...

4 Commits

Author SHA1 Message Date
046467f535 fix 0chan 2020-12-20 21:09:11 -05:00
a971ac8c5d fix plus4chan 2020-12-20 21:05:58 -05:00
6da2cd54c3 Performance improvements 2020-12-20 20:27:24 -05:00
75f5d79a75 Use sets instead of keys in state 2020-12-20 19:58:31 -05:00
3 changed files with 16 additions and 17 deletions

View File

@ -45,7 +45,7 @@ class Plus4ChanHelper(ChanHelper):
threads.append({ threads.append({
"id": int(threadEl.get("id")[1:]), "id": int(threadEl.get("id")[1:]),
"omit": int(omit.text.split(" ")[1]) if omit else 0 "omit": int(omit.text.strip().split(" ")[1]) if omit else 0
}) })
for link in soup.find_all("a", href=lambda x: x): for link in soup.find_all("a", href=lambda x: x):

View File

@ -78,5 +78,5 @@ class ZerochanHtmlChanHelper(DoushioHtmlChanHelper):
"type": "thread", "type": "thread",
"html": str(op_el), "html": str(op_el),
"time": int(datetime.datetime.strptime(_ru_datefmt(op_el.find("time").text), "time": int(datetime.datetime.strptime(_ru_datefmt(op_el.find("time").text),
"%d %b %Y %H:%M").timestamp()) "%d %b %Y %H:%M:%S").timestamp())
} }

29
run.py
View File

@ -8,7 +8,7 @@ from queue import Queue
from threading import Thread from threading import Thread
import redis import redis
from hexlib.db import VolatileState from hexlib.db import VolatileBooleanState, VolatileState
from hexlib.monitoring import Monitoring from hexlib.monitoring import Monitoring
from chan.chan import CHANS from chan.chan import CHANS
@ -27,10 +27,9 @@ else:
REDIS_HOST = os.environ.get("CF_REDIS_HOST", "localhost") REDIS_HOST = os.environ.get("CF_REDIS_HOST", "localhost")
REDIS_PORT = os.environ.get("CF_REDIS_PORT", 6379) REDIS_PORT = os.environ.get("CF_REDIS_PORT", 6379)
CHAN = os.environ.get("CF_CHAN", None) CHAN = os.environ.get("CF_CHAN", None)
CF_PUBLISH = os.environ.get("CF_PUBLISH", False)
ARC_LISTS = os.environ.get("CF_ARC_LISTS", "arc,imhash").split(",") ARC_LISTS = os.environ.get("CF_ARC_LISTS", "arc").split(",")
PUB_CHANNEL = os.environ.get("CF_PUB_CHANNEL", "chan_feed")
class ChanScanner: class ChanScanner:
@ -84,30 +83,29 @@ def once(func):
class ChanState: class ChanState:
def __init__(self, prefix): def __init__(self, prefix):
self._state = VolatileState(prefix, 86400 * 7, host=REDIS_HOST, port=REDIS_PORT) self._posts = VolatileBooleanState(prefix, host=REDIS_HOST, port=REDIS_PORT)
self._threads = VolatileState(prefix, host=REDIS_HOST, port=REDIS_PORT)
print("redis host=" + REDIS_HOST) print("redis host=" + REDIS_HOST)
def mark_visited(self, item: int): def mark_visited(self, item: int):
self._state["posts"][item] = 1 self._posts["posts"][item] = True
def has_visited(self, item: int): def has_visited(self, item: int):
return self._state["posts"][item] is not None return self._posts["posts"][item]
def has_new_posts(self, thread, helper, board): def has_new_posts(self, thread, helper, board):
mtime = helper.thread_mtime(thread) mtime = helper.thread_mtime(thread)
if mtime == -1: if mtime == -1:
return True return True
t = self._state["threads"][helper.item_unique_id(thread, board)] t = self._threads["threads"][helper.item_unique_id(thread, board)]
if not t or helper.thread_mtime(thread) != t["last_modified"] or t["ts"] + 86400 < int(time.time()): return not t or helper.thread_mtime(thread) != t["m"] or t["t"] + 86400 < int(time.time())
return True
return False
def mark_thread_as_visited(self, thread, helper, board): def mark_thread_as_visited(self, thread, helper, board):
self._state["threads"][helper.item_unique_id(thread, board)] = { self._threads["threads"][helper.item_unique_id(thread, board)] = {
"ts": time.time(), "t": int(time.time()),
"last_modified": helper.thread_mtime(thread) "m": helper.thread_mtime(thread)
} }
@ -133,7 +131,8 @@ def publish(item, board, helper):
routing_key = "%s.%s.%s" % (CHAN, item_type, board) routing_key = "%s.%s.%s" % (CHAN, item_type, board)
message = json.dumps(item, separators=(',', ':'), ensure_ascii=False, sort_keys=True) message = json.dumps(item, separators=(',', ':'), ensure_ascii=False, sort_keys=True)
rdb.publish("chan." + routing_key, message) if CF_PUBLISH:
rdb.publish("chan." + routing_key, message)
for arc in ARC_LISTS: for arc in ARC_LISTS:
rdb.lpush(arc + ".chan." + routing_key, message) rdb.lpush(arc + ".chan." + routing_key, message)