Compare commits

..

No commits in common. "b103bfb4a585dd884b45af934d114d4d4764be35" and "e4b94ce045b8d112d0b1e7551480f25a18094d9a" have entirely different histories.

5 changed files with 29 additions and 24 deletions

21
lbry.py
View File

@ -1,11 +1,10 @@
import json
import os
from time import time from time import time
import json
import requests import requests
from hexlib.log import logger
from state import LbryState from state import LbryState
from util import logger
BASE_URL = "https://api.lbry.tv/api" BASE_URL = "https://api.lbry.tv/api"
LIGHTHOUSE_URL = "https://lighthouse.lbry.com" LIGHTHOUSE_URL = "https://lighthouse.lbry.com"
@ -19,12 +18,6 @@ class LbryApi:
def __init__(self): def __init__(self):
self._s = requests.session() self._s = requests.session()
if os.environ.get("PROXY") is not None:
self._s.proxies = {
"http": os.environ.get("PROXY"),
"https": os.environ.get("PROXY"),
}
def _post(self, url, **kwargs): def _post(self, url, **kwargs):
r = self._s.post(url, **kwargs) r = self._s.post(url, **kwargs)
logger.debug("GET %s <%d>" % (url, r.status_code)) logger.debug("GET %s <%d>" % (url, r.status_code))
@ -185,10 +178,6 @@ class LbryWrapper:
# ancaps # ancaps
"0135b83c29aa82120401f3f9053bf5b0520529ed", "0135b83c29aa82120401f3f9053bf5b0520529ed",
"b89ed227c49e726fcccf913bdc9dec4c8fec99c2", "b89ed227c49e726fcccf913bdc9dec4c8fec99c2",
"6caae01aaa534cc4cb2cb1d8d0a8fd4a9553b155",
"dbe7328c6698c8d8853183f87e50a97a87a33222",
"8954add966e59c9cba98a143a3387f788a36d7be"
] ]
for channel_id in seed_list: for channel_id in seed_list:
@ -211,15 +200,11 @@ class LbryWrapper:
for claim in self._get_videos(channel_id): for claim in self._get_videos(channel_id):
if "short_url" not in claim["signing_channel"]:
continue
channel_url = claim["signing_channel"]["short_url"] channel_url = claim["signing_channel"]["short_url"]
if not published_channel_data: if not published_channel_data:
channel_data = self._api.resolve([channel_url])[channel_url] channel_data = self._api.resolve([channel_url])[channel_url]
yield channel_data, "channel" yield channel_data, "channel"
published_channel_data = True
if not self._state.has_visited(claim["claim_id"]): if not self._state.has_visited(claim["claim_id"]):
yield claim, "video" yield claim, "video"
@ -251,3 +236,5 @@ class LbryWrapper:
self._state.mark_visited(channel_id) self._state.mark_visited(channel_id)
logger.warning("No more channels to crawl!") logger.warning("No more channels to crawl!")

View File

@ -1,3 +1,3 @@
requests requests
redis
git+git://github.com/simon987/hexlib.git git+git://github.com/simon987/hexlib.git
redis

8
run.py
View File

@ -1,9 +1,11 @@
import json import json
import redis
from hexlib.env import get_redis import os
from lbry import LbryWrapper from lbry import LbryWrapper
REDIS_HOST = os.environ.get("LF_REDIS_HOST", "localhost")
def publish(item, item_type): def publish(item, item_type):
routing_key = f"arc.lbry.{item_type}.x" routing_key = f"arc.lbry.{item_type}.x"
@ -22,7 +24,7 @@ def publish(item, item_type):
if __name__ == '__main__': if __name__ == '__main__':
lbry = LbryWrapper() lbry = LbryWrapper()
rdb = get_redis() rdb = redis.Redis(host=REDIS_HOST)
for item, item_type in lbry.all_items(): for item, item_type in lbry.all_items():
publish(item, item_type) publish(item, item_type)

View File

@ -1,11 +1,14 @@
from hexlib.db import VolatileQueue, VolatileBooleanState from hexlib.db import VolatileQueue, VolatileBooleanState
import os
REDIS_HOST = os.environ.get("LF_REDIS_HOST", "localhost")
class LbryState: class LbryState:
def __init__(self): def __init__(self):
self._visited = VolatileBooleanState(prefix="lbry", sep=".") self._visited = VolatileBooleanState(prefix="lbry", host=REDIS_HOST)
self._channel_queue = VolatileQueue("lbry_channel_queue") self._channel_queue = VolatileQueue("lbry_channel_queue", host=REDIS_HOST)
def has_visited(self, item_id): def has_visited(self, item_id):
return self._visited["byid"][item_id] return self._visited["byid"][item_id]

13
util.py Normal file
View File

@ -0,0 +1,13 @@
import logging
import sys
from logging import StreamHandler
logger = logging.getLogger("default")
logger.setLevel(logging.DEBUG)
for h in logger.handlers:
logger.removeHandler(h)
handler = StreamHandler(sys.stdout)
handler.formatter = logging.Formatter("%(asctime)s %(levelname)-5s %(message)s")
logger.addHandler(handler)