update has_visited condition

This commit is contained in:
simon987 2021-02-06 17:31:02 -05:00
parent 2f8cbca0b2
commit 3dda97e180
3 changed files with 12 additions and 5 deletions

View File

@ -8,3 +8,4 @@ services:
- "GTV_REDIS_HOST="
- "GTV_RPS=0.10"
- "GTV_MAX_PAGES=9999999"
- "GTV_RECRAWL_HOURS=8"

7
run.py
View File

@ -62,10 +62,9 @@ if __name__ == "__main__":
rdb = redis.Redis(host=REDIS_HOST, port=REDIS_PORT)
publish_q = Queue()
for _ in range(3):
publish_thread = Thread(target=publish_worker, args=(publish_q,))
publish_thread.setDaemon(True)
publish_thread.start()
publish_thread = Thread(target=publish_worker, args=(publish_q,))
publish_thread.setDaemon(True)
publish_thread.start()
s = GabTvScanner(state, GTV_RPS)

View File

@ -1,7 +1,12 @@
from time import time
import os
from hexlib.db import VolatileState, VolatileBooleanState
from util import logger
RECRAWL_HOURS = int(os.environ.get("GTV_RECRAWL_HOURS", 8))
class GabTvState:
def __init__(self, prefix, host, port):
@ -22,7 +27,9 @@ class GabTvState:
# com_count = self._episodes["episodes"][episode["_id"]]
# return not com_count or episode["stats"]["commentCount"] == com_count
last_visited = self._episodes["ep_ts"][episode["_id"]]
return last_visited and int(time()) - int(last_visited) <= 3600 * 24 * 3
if last_visited:
logger.debug("Last seen: %d hours ago" % (int(time() - last_visited) / 3600))
return last_visited and int(time() - last_visited) <= 3600 * RECRAWL_HOURS
def mark_visited_episode(self, episode):
self._episodes["ep_ts"][episode["_id"]] = int(time())