From f25d091da8f5ad55c8bfd78d2e25902bb298e5b1 Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 7 Sep 2019 15:31:09 -0400 Subject: [PATCH] per-chan rate limits --- chan.py | 24 +++++++++++++++--------- run.py | 4 ++-- util.py | 13 ++++++++++--- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/chan.py b/chan.py index 75c3686..e57876c 100644 --- a/chan.py +++ b/chan.py @@ -1,4 +1,3 @@ -import datetime import json from urllib.parse import urljoin @@ -8,13 +7,14 @@ from post_process import get_links_from_body class ChanHelper: - def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards): + def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps=None): self.db_id = db_id self._base_url = base_url self._image_url = image_url self._thread_path = thread_path self._image_path = image_path self._boards = boards + self.rps = rps def boards(self): return [b for b in self._boards if not b.startswith("_")] @@ -87,8 +87,6 @@ class HtmlChanHelper(ChanHelper): @staticmethod def item_mtime(item): - print(item) - exit(0) return 0 # TODO def parse_threads_list(self, r): @@ -230,7 +228,8 @@ CHANS = { "gd", "hc", "his", "int", "jp", "lit", "mlp", "mu", "n", "news", "out", "po", "pol", "qst", "sci", "soc", "sp", "tg", "toy", "trv", "tv", "vp", "wsg", "wsr", "x" - ) + ), + rps=2/3 ), "lainchan": JsonChanHelper( 2, @@ -242,7 +241,8 @@ CHANS = { "λ", "diy", "sec", "tech", "inter", "lit", "music", "vis", "hum", "drg", "zzz", "layer", "q", "r", "_cult", "_psy", "_mega", - ) + ), + rps=1 / 60 ), "uboachan": JsonChanHelper( 3, @@ -253,7 +253,8 @@ CHANS = { ( "yn", "yndd", "fg", "yume", "o", "lit", "media", "og", "ig", "2", "ot", "hikki", "cc", "x", "sugg" - ) + ), + rps=1 / 120 ), "22chan": JsonChanHelper( 4, @@ -264,7 +265,8 @@ CHANS = { ( "a", "b", "f", "yu", "i", "k", "mu", "pol", "sewers", "sg", "t", "vg" - ) + ), + rps=1 / 120 ), "wizchan": JsonChanHelper( 5, @@ -274,7 +276,8 @@ CHANS = { "/src/", ( "wiz", "dep", "hob", "lounge", "jp", "meta", "games", "music", - ) + ), + rps=1 / 30 ), # TODO "1chan": ChanHelper( @@ -286,6 +289,7 @@ CHANS = { ( "rails" ), + rps=1 / 600 ), "2chhk": RussianJsonChanHelper( 7, @@ -305,6 +309,7 @@ CHANS = { "a", "fd", "ja", "ma", "vn", "fg", "fur", "gg", "ga", "vape", "h", "ho", "hc", "e", "fet", "sex", "fag" ), + rps=1 ), "endchan": HtmlChanHelper( 8, @@ -325,5 +330,6 @@ CHANS = { "ausneets", "qanonresearch", "polru", "yuri", "christianity", "kc", "rapport", "news", "brit", "webm", "4chon" ), + rps=1 ), } diff --git a/run.py b/run.py index bc8ac57..b73d942 100644 --- a/run.py +++ b/run.py @@ -19,7 +19,7 @@ MONITORING = True class ChanScanner: def __init__(self, helper): - self.web = Web(monitoring if MONITORING else None) + self.web = Web(monitoring if MONITORING else None, rps=helper.rps) self.helper = helper self.state = ChanState() @@ -140,7 +140,7 @@ class ChanState: def publish_worker(queue: Queue, helper): channel = connect() - web = Web(monitoring if MONITORING else None) + web = Web(monitoring if MONITORING else None, rps=helper.rps) while True: try: diff --git a/util.py b/util.py index b578bd3..b8f4802 100644 --- a/util.py +++ b/util.py @@ -42,14 +42,21 @@ def rate_limit(per_second): class Web: - def __init__(self, monitoring): + def __init__(self, monitoring, rps=1/2): self.session = requests.Session() + self._rps = rps self.monitoring = monitoring - @rate_limit(1 / 2) # TODO: per chan rate limit? + @rate_limit(self._rps) + def _get(url, **kwargs): + return self.session.get(url, **kwargs) + + self._get = _get + def get(self, url, **kwargs): try: - r = self.session.get(url, **kwargs) + r = self._get(url, **kwargs) + logger.debug("GET %s <%d>" % (url, r.status_code)) if self.monitoring: self.monitoring.log([{