Compare commits

...

2 Commits

Author SHA1 Message Date
b103bfb4a5 Updates 2021-09-25 15:42:58 -04:00
9ce05ddc78 don't publish channel multiple times 2021-02-07 18:48:55 -05:00
5 changed files with 24 additions and 29 deletions

21
lbry.py
View File

@ -1,10 +1,11 @@
from time import time
import json import json
import os
from time import time
import requests import requests
from hexlib.log import logger
from state import LbryState from state import LbryState
from util import logger
BASE_URL = "https://api.lbry.tv/api" BASE_URL = "https://api.lbry.tv/api"
LIGHTHOUSE_URL = "https://lighthouse.lbry.com" LIGHTHOUSE_URL = "https://lighthouse.lbry.com"
@ -18,6 +19,12 @@ class LbryApi:
def __init__(self): def __init__(self):
self._s = requests.session() self._s = requests.session()
if os.environ.get("PROXY") is not None:
self._s.proxies = {
"http": os.environ.get("PROXY"),
"https": os.environ.get("PROXY"),
}
def _post(self, url, **kwargs): def _post(self, url, **kwargs):
r = self._s.post(url, **kwargs) r = self._s.post(url, **kwargs)
logger.debug("GET %s <%d>" % (url, r.status_code)) logger.debug("GET %s <%d>" % (url, r.status_code))
@ -178,6 +185,10 @@ class LbryWrapper:
# ancaps # ancaps
"0135b83c29aa82120401f3f9053bf5b0520529ed", "0135b83c29aa82120401f3f9053bf5b0520529ed",
"b89ed227c49e726fcccf913bdc9dec4c8fec99c2", "b89ed227c49e726fcccf913bdc9dec4c8fec99c2",
"6caae01aaa534cc4cb2cb1d8d0a8fd4a9553b155",
"dbe7328c6698c8d8853183f87e50a97a87a33222",
"8954add966e59c9cba98a143a3387f788a36d7be"
] ]
for channel_id in seed_list: for channel_id in seed_list:
@ -200,11 +211,15 @@ class LbryWrapper:
for claim in self._get_videos(channel_id): for claim in self._get_videos(channel_id):
if "short_url" not in claim["signing_channel"]:
continue
channel_url = claim["signing_channel"]["short_url"] channel_url = claim["signing_channel"]["short_url"]
if not published_channel_data: if not published_channel_data:
channel_data = self._api.resolve([channel_url])[channel_url] channel_data = self._api.resolve([channel_url])[channel_url]
yield channel_data, "channel" yield channel_data, "channel"
published_channel_data = True
if not self._state.has_visited(claim["claim_id"]): if not self._state.has_visited(claim["claim_id"]):
yield claim, "video" yield claim, "video"
@ -236,5 +251,3 @@ class LbryWrapper:
self._state.mark_visited(channel_id) self._state.mark_visited(channel_id)
logger.warning("No more channels to crawl!") logger.warning("No more channels to crawl!")

View File

@ -1,3 +1,3 @@
requests requests
git+git://github.com/simon987/hexlib.git
redis redis
git+git://github.com/simon987/hexlib.git

8
run.py
View File

@ -1,11 +1,9 @@
import json import json
import redis
import os from hexlib.env import get_redis
from lbry import LbryWrapper from lbry import LbryWrapper
REDIS_HOST = os.environ.get("LF_REDIS_HOST", "localhost")
def publish(item, item_type): def publish(item, item_type):
routing_key = f"arc.lbry.{item_type}.x" routing_key = f"arc.lbry.{item_type}.x"
@ -24,7 +22,7 @@ def publish(item, item_type):
if __name__ == '__main__': if __name__ == '__main__':
lbry = LbryWrapper() lbry = LbryWrapper()
rdb = redis.Redis(host=REDIS_HOST) rdb = get_redis()
for item, item_type in lbry.all_items(): for item, item_type in lbry.all_items():
publish(item, item_type) publish(item, item_type)

View File

@ -1,14 +1,11 @@
from hexlib.db import VolatileQueue, VolatileBooleanState from hexlib.db import VolatileQueue, VolatileBooleanState
import os
REDIS_HOST = os.environ.get("LF_REDIS_HOST", "localhost")
class LbryState: class LbryState:
def __init__(self): def __init__(self):
self._visited = VolatileBooleanState(prefix="lbry", host=REDIS_HOST) self._visited = VolatileBooleanState(prefix="lbry", sep=".")
self._channel_queue = VolatileQueue("lbry_channel_queue", host=REDIS_HOST) self._channel_queue = VolatileQueue("lbry_channel_queue")
def has_visited(self, item_id): def has_visited(self, item_id):
return self._visited["byid"][item_id] return self._visited["byid"][item_id]

13
util.py
View File

@ -1,13 +0,0 @@
import logging
import sys
from logging import StreamHandler
logger = logging.getLogger("default")
logger.setLevel(logging.DEBUG)
for h in logger.handlers:
logger.removeHandler(h)
handler = StreamHandler(sys.stdout)
handler.formatter = logging.Formatter("%(asctime)s %(levelname)-5s %(message)s")
logger.addHandler(handler)