Compare commits

...

4 Commits

Author SHA1 Message Date
046467f535 fix 0chan 2020-12-20 21:09:11 -05:00
a971ac8c5d fix plus4chan 2020-12-20 21:05:58 -05:00
6da2cd54c3 Performance improvements 2020-12-20 20:27:24 -05:00
75f5d79a75 Use sets instead of keys in state 2020-12-20 19:58:31 -05:00
3 changed files with 16 additions and 17 deletions

View File

@ -45,7 +45,7 @@ class Plus4ChanHelper(ChanHelper):
threads.append({
"id": int(threadEl.get("id")[1:]),
"omit": int(omit.text.split(" ")[1]) if omit else 0
"omit": int(omit.text.strip().split(" ")[1]) if omit else 0
})
for link in soup.find_all("a", href=lambda x: x):

View File

@ -78,5 +78,5 @@ class ZerochanHtmlChanHelper(DoushioHtmlChanHelper):
"type": "thread",
"html": str(op_el),
"time": int(datetime.datetime.strptime(_ru_datefmt(op_el.find("time").text),
"%d %b %Y %H:%M").timestamp())
"%d %b %Y %H:%M:%S").timestamp())
}

29
run.py
View File

@ -8,7 +8,7 @@ from queue import Queue
from threading import Thread
import redis
from hexlib.db import VolatileState
from hexlib.db import VolatileBooleanState, VolatileState
from hexlib.monitoring import Monitoring
from chan.chan import CHANS
@ -27,10 +27,9 @@ else:
REDIS_HOST = os.environ.get("CF_REDIS_HOST", "localhost")
REDIS_PORT = os.environ.get("CF_REDIS_PORT", 6379)
CHAN = os.environ.get("CF_CHAN", None)
CF_PUBLISH = os.environ.get("CF_PUBLISH", False)
ARC_LISTS = os.environ.get("CF_ARC_LISTS", "arc,imhash").split(",")
PUB_CHANNEL = os.environ.get("CF_PUB_CHANNEL", "chan_feed")
ARC_LISTS = os.environ.get("CF_ARC_LISTS", "arc").split(",")
class ChanScanner:
@ -84,30 +83,29 @@ def once(func):
class ChanState:
def __init__(self, prefix):
self._state = VolatileState(prefix, 86400 * 7, host=REDIS_HOST, port=REDIS_PORT)
self._posts = VolatileBooleanState(prefix, host=REDIS_HOST, port=REDIS_PORT)
self._threads = VolatileState(prefix, host=REDIS_HOST, port=REDIS_PORT)
print("redis host=" + REDIS_HOST)
def mark_visited(self, item: int):
self._state["posts"][item] = 1
self._posts["posts"][item] = True
def has_visited(self, item: int):
return self._state["posts"][item] is not None
return self._posts["posts"][item]
def has_new_posts(self, thread, helper, board):
mtime = helper.thread_mtime(thread)
if mtime == -1:
return True
t = self._state["threads"][helper.item_unique_id(thread, board)]
t = self._threads["threads"][helper.item_unique_id(thread, board)]
if not t or helper.thread_mtime(thread) != t["last_modified"] or t["ts"] + 86400 < int(time.time()):
return True
return False
return not t or helper.thread_mtime(thread) != t["m"] or t["t"] + 86400 < int(time.time())
def mark_thread_as_visited(self, thread, helper, board):
self._state["threads"][helper.item_unique_id(thread, board)] = {
"ts": time.time(),
"last_modified": helper.thread_mtime(thread)
self._threads["threads"][helper.item_unique_id(thread, board)] = {
"t": int(time.time()),
"m": helper.thread_mtime(thread)
}
@ -133,7 +131,8 @@ def publish(item, board, helper):
routing_key = "%s.%s.%s" % (CHAN, item_type, board)
message = json.dumps(item, separators=(',', ':'), ensure_ascii=False, sort_keys=True)
rdb.publish("chan." + routing_key, message)
if CF_PUBLISH:
rdb.publish("chan." + routing_key, message)
for arc in ARC_LISTS:
rdb.lpush(arc + ".chan." + routing_key, message)