From b103bfb4a585dd884b45af934d114d4d4764be35 Mon Sep 17 00:00:00 2001 From: simon987 Date: Sat, 25 Sep 2021 15:42:58 -0400 Subject: [PATCH] Updates --- lbry.py | 20 ++++++++++++++++---- requirements.txt | 4 ++-- run.py | 8 +++----- state.py | 7 ++----- util.py | 13 ------------- 5 files changed, 23 insertions(+), 29 deletions(-) delete mode 100644 util.py diff --git a/lbry.py b/lbry.py index bda9da9..dd6fce3 100644 --- a/lbry.py +++ b/lbry.py @@ -1,10 +1,11 @@ -from time import time import json +import os +from time import time import requests +from hexlib.log import logger from state import LbryState -from util import logger BASE_URL = "https://api.lbry.tv/api" LIGHTHOUSE_URL = "https://lighthouse.lbry.com" @@ -18,6 +19,12 @@ class LbryApi: def __init__(self): self._s = requests.session() + if os.environ.get("PROXY") is not None: + self._s.proxies = { + "http": os.environ.get("PROXY"), + "https": os.environ.get("PROXY"), + } + def _post(self, url, **kwargs): r = self._s.post(url, **kwargs) logger.debug("GET %s <%d>" % (url, r.status_code)) @@ -178,6 +185,10 @@ class LbryWrapper: # ancaps "0135b83c29aa82120401f3f9053bf5b0520529ed", "b89ed227c49e726fcccf913bdc9dec4c8fec99c2", + + "6caae01aaa534cc4cb2cb1d8d0a8fd4a9553b155", + "dbe7328c6698c8d8853183f87e50a97a87a33222", + "8954add966e59c9cba98a143a3387f788a36d7be" ] for channel_id in seed_list: @@ -200,6 +211,9 @@ class LbryWrapper: for claim in self._get_videos(channel_id): + if "short_url" not in claim["signing_channel"]: + continue + channel_url = claim["signing_channel"]["short_url"] if not published_channel_data: @@ -237,5 +251,3 @@ class LbryWrapper: self._state.mark_visited(channel_id) logger.warning("No more channels to crawl!") - - diff --git a/requirements.txt b/requirements.txt index 4fd7d2c..9ff1a9e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ requests -git+git://github.com/simon987/hexlib.git -redis \ No newline at end of file +redis +git+git://github.com/simon987/hexlib.git \ No newline at end of file diff --git a/run.py b/run.py index aed4195..c130d10 100644 --- a/run.py +++ b/run.py @@ -1,11 +1,9 @@ import json -import redis -import os + +from hexlib.env import get_redis from lbry import LbryWrapper -REDIS_HOST = os.environ.get("LF_REDIS_HOST", "localhost") - def publish(item, item_type): routing_key = f"arc.lbry.{item_type}.x" @@ -24,7 +22,7 @@ def publish(item, item_type): if __name__ == '__main__': lbry = LbryWrapper() - rdb = redis.Redis(host=REDIS_HOST) + rdb = get_redis() for item, item_type in lbry.all_items(): publish(item, item_type) diff --git a/state.py b/state.py index 9bdfe36..7b0d826 100644 --- a/state.py +++ b/state.py @@ -1,14 +1,11 @@ from hexlib.db import VolatileQueue, VolatileBooleanState -import os - -REDIS_HOST = os.environ.get("LF_REDIS_HOST", "localhost") class LbryState: def __init__(self): - self._visited = VolatileBooleanState(prefix="lbry", host=REDIS_HOST) - self._channel_queue = VolatileQueue("lbry_channel_queue", host=REDIS_HOST) + self._visited = VolatileBooleanState(prefix="lbry", sep=".") + self._channel_queue = VolatileQueue("lbry_channel_queue") def has_visited(self, item_id): return self._visited["byid"][item_id] diff --git a/util.py b/util.py deleted file mode 100644 index cd834ed..0000000 --- a/util.py +++ /dev/null @@ -1,13 +0,0 @@ -import logging -import sys -from logging import StreamHandler - -logger = logging.getLogger("default") -logger.setLevel(logging.DEBUG) - -for h in logger.handlers: - logger.removeHandler(h) - -handler = StreamHandler(sys.stdout) -handler.formatter = logging.Formatter("%(asctime)s %(levelname)-5s %(message)s") -logger.addHandler(handler)