diff --git a/docker-compose.yml b/docker-compose.yml index 0268694..d20b950 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,3 +8,4 @@ services: - "GTV_REDIS_HOST=" - "GTV_RPS=0.10" - "GTV_MAX_PAGES=9999999" + - "GTV_RECRAWL_HOURS=8" diff --git a/run.py b/run.py index 6a1f93a..a418332 100644 --- a/run.py +++ b/run.py @@ -62,10 +62,9 @@ if __name__ == "__main__": rdb = redis.Redis(host=REDIS_HOST, port=REDIS_PORT) publish_q = Queue() - for _ in range(3): - publish_thread = Thread(target=publish_worker, args=(publish_q,)) - publish_thread.setDaemon(True) - publish_thread.start() + publish_thread = Thread(target=publish_worker, args=(publish_q,)) + publish_thread.setDaemon(True) + publish_thread.start() s = GabTvScanner(state, GTV_RPS) diff --git a/state.py b/state.py index a2091cd..98fd563 100644 --- a/state.py +++ b/state.py @@ -1,7 +1,12 @@ from time import time +import os from hexlib.db import VolatileState, VolatileBooleanState +from util import logger + +RECRAWL_HOURS = int(os.environ.get("GTV_RECRAWL_HOURS", 8)) + class GabTvState: def __init__(self, prefix, host, port): @@ -22,7 +27,9 @@ class GabTvState: # com_count = self._episodes["episodes"][episode["_id"]] # return not com_count or episode["stats"]["commentCount"] == com_count last_visited = self._episodes["ep_ts"][episode["_id"]] - return last_visited and int(time()) - int(last_visited) <= 3600 * 24 * 3 + if last_visited: + logger.debug("Last seen: %d hours ago" % (int(time() - last_visited) / 3600)) + return last_visited and int(time() - last_visited) <= 3600 * RECRAWL_HOURS def mark_visited_episode(self, episode): self._episodes["ep_ts"][episode["_id"]] = int(time())