This commit is contained in:
simon987 2021-09-25 15:42:58 -04:00
parent 9ce05ddc78
commit b103bfb4a5
5 changed files with 23 additions and 29 deletions

20
lbry.py
View File

@ -1,10 +1,11 @@
from time import time
import json
import os
from time import time
import requests
from hexlib.log import logger
from state import LbryState
from util import logger
BASE_URL = "https://api.lbry.tv/api"
LIGHTHOUSE_URL = "https://lighthouse.lbry.com"
@ -18,6 +19,12 @@ class LbryApi:
def __init__(self):
self._s = requests.session()
if os.environ.get("PROXY") is not None:
self._s.proxies = {
"http": os.environ.get("PROXY"),
"https": os.environ.get("PROXY"),
}
def _post(self, url, **kwargs):
r = self._s.post(url, **kwargs)
logger.debug("GET %s <%d>" % (url, r.status_code))
@ -178,6 +185,10 @@ class LbryWrapper:
# ancaps
"0135b83c29aa82120401f3f9053bf5b0520529ed",
"b89ed227c49e726fcccf913bdc9dec4c8fec99c2",
"6caae01aaa534cc4cb2cb1d8d0a8fd4a9553b155",
"dbe7328c6698c8d8853183f87e50a97a87a33222",
"8954add966e59c9cba98a143a3387f788a36d7be"
]
for channel_id in seed_list:
@ -200,6 +211,9 @@ class LbryWrapper:
for claim in self._get_videos(channel_id):
if "short_url" not in claim["signing_channel"]:
continue
channel_url = claim["signing_channel"]["short_url"]
if not published_channel_data:
@ -237,5 +251,3 @@ class LbryWrapper:
self._state.mark_visited(channel_id)
logger.warning("No more channels to crawl!")

View File

@ -1,3 +1,3 @@
requests
git+git://github.com/simon987/hexlib.git
redis
redis
git+git://github.com/simon987/hexlib.git

8
run.py
View File

@ -1,11 +1,9 @@
import json
import redis
import os
from hexlib.env import get_redis
from lbry import LbryWrapper
REDIS_HOST = os.environ.get("LF_REDIS_HOST", "localhost")
def publish(item, item_type):
routing_key = f"arc.lbry.{item_type}.x"
@ -24,7 +22,7 @@ def publish(item, item_type):
if __name__ == '__main__':
lbry = LbryWrapper()
rdb = redis.Redis(host=REDIS_HOST)
rdb = get_redis()
for item, item_type in lbry.all_items():
publish(item, item_type)

View File

@ -1,14 +1,11 @@
from hexlib.db import VolatileQueue, VolatileBooleanState
import os
REDIS_HOST = os.environ.get("LF_REDIS_HOST", "localhost")
class LbryState:
def __init__(self):
self._visited = VolatileBooleanState(prefix="lbry", host=REDIS_HOST)
self._channel_queue = VolatileQueue("lbry_channel_queue", host=REDIS_HOST)
self._visited = VolatileBooleanState(prefix="lbry", sep=".")
self._channel_queue = VolatileQueue("lbry_channel_queue")
def has_visited(self, item_id):
return self._visited["byid"][item_id]

13
util.py
View File

@ -1,13 +0,0 @@
import logging
import sys
from logging import StreamHandler
logger = logging.getLogger("default")
logger.setLevel(logging.DEBUG)
for h in logger.handlers:
logger.removeHandler(h)
handler = StreamHandler(sys.stdout)
handler.formatter = logging.Formatter("%(asctime)s %(levelname)-5s %(message)s")
logger.addHandler(handler)