This commit is contained in:
simon987 2021-09-25 15:42:58 -04:00
parent 9ce05ddc78
commit b103bfb4a5
5 changed files with 23 additions and 29 deletions

20
lbry.py
View File

@ -1,10 +1,11 @@
from time import time
import json import json
import os
from time import time
import requests import requests
from hexlib.log import logger
from state import LbryState from state import LbryState
from util import logger
BASE_URL = "https://api.lbry.tv/api" BASE_URL = "https://api.lbry.tv/api"
LIGHTHOUSE_URL = "https://lighthouse.lbry.com" LIGHTHOUSE_URL = "https://lighthouse.lbry.com"
@ -18,6 +19,12 @@ class LbryApi:
def __init__(self): def __init__(self):
self._s = requests.session() self._s = requests.session()
if os.environ.get("PROXY") is not None:
self._s.proxies = {
"http": os.environ.get("PROXY"),
"https": os.environ.get("PROXY"),
}
def _post(self, url, **kwargs): def _post(self, url, **kwargs):
r = self._s.post(url, **kwargs) r = self._s.post(url, **kwargs)
logger.debug("GET %s <%d>" % (url, r.status_code)) logger.debug("GET %s <%d>" % (url, r.status_code))
@ -178,6 +185,10 @@ class LbryWrapper:
# ancaps # ancaps
"0135b83c29aa82120401f3f9053bf5b0520529ed", "0135b83c29aa82120401f3f9053bf5b0520529ed",
"b89ed227c49e726fcccf913bdc9dec4c8fec99c2", "b89ed227c49e726fcccf913bdc9dec4c8fec99c2",
"6caae01aaa534cc4cb2cb1d8d0a8fd4a9553b155",
"dbe7328c6698c8d8853183f87e50a97a87a33222",
"8954add966e59c9cba98a143a3387f788a36d7be"
] ]
for channel_id in seed_list: for channel_id in seed_list:
@ -200,6 +211,9 @@ class LbryWrapper:
for claim in self._get_videos(channel_id): for claim in self._get_videos(channel_id):
if "short_url" not in claim["signing_channel"]:
continue
channel_url = claim["signing_channel"]["short_url"] channel_url = claim["signing_channel"]["short_url"]
if not published_channel_data: if not published_channel_data:
@ -237,5 +251,3 @@ class LbryWrapper:
self._state.mark_visited(channel_id) self._state.mark_visited(channel_id)
logger.warning("No more channels to crawl!") logger.warning("No more channels to crawl!")

View File

@ -1,3 +1,3 @@
requests requests
git+git://github.com/simon987/hexlib.git redis
redis git+git://github.com/simon987/hexlib.git

8
run.py
View File

@ -1,11 +1,9 @@
import json import json
import redis
import os from hexlib.env import get_redis
from lbry import LbryWrapper from lbry import LbryWrapper
REDIS_HOST = os.environ.get("LF_REDIS_HOST", "localhost")
def publish(item, item_type): def publish(item, item_type):
routing_key = f"arc.lbry.{item_type}.x" routing_key = f"arc.lbry.{item_type}.x"
@ -24,7 +22,7 @@ def publish(item, item_type):
if __name__ == '__main__': if __name__ == '__main__':
lbry = LbryWrapper() lbry = LbryWrapper()
rdb = redis.Redis(host=REDIS_HOST) rdb = get_redis()
for item, item_type in lbry.all_items(): for item, item_type in lbry.all_items():
publish(item, item_type) publish(item, item_type)

View File

@ -1,14 +1,11 @@
from hexlib.db import VolatileQueue, VolatileBooleanState from hexlib.db import VolatileQueue, VolatileBooleanState
import os
REDIS_HOST = os.environ.get("LF_REDIS_HOST", "localhost")
class LbryState: class LbryState:
def __init__(self): def __init__(self):
self._visited = VolatileBooleanState(prefix="lbry", host=REDIS_HOST) self._visited = VolatileBooleanState(prefix="lbry", sep=".")
self._channel_queue = VolatileQueue("lbry_channel_queue", host=REDIS_HOST) self._channel_queue = VolatileQueue("lbry_channel_queue")
def has_visited(self, item_id): def has_visited(self, item_id):
return self._visited["byid"][item_id] return self._visited["byid"][item_id]

13
util.py
View File

@ -1,13 +0,0 @@
import logging
import sys
from logging import StreamHandler
logger = logging.getLogger("default")
logger.setLevel(logging.DEBUG)
for h in logger.handlers:
logger.removeHandler(h)
handler = StreamHandler(sys.stdout)
handler.formatter = logging.Formatter("%(asctime)s %(levelname)-5s %(message)s")
logger.addHandler(handler)