From 381468a4636e32ee007fdd058b3d9c391e512d58 Mon Sep 17 00:00:00 2001 From: simon Date: Tue, 17 Dec 2019 13:07:43 -0500 Subject: [PATCH] 8kun hotfix --- chan/chan.py | 90 +++++++++++++++++------------------------------- chan/helper.py | 1 + chan/json_kun.py | 23 ++++++++++++- run.py | 4 +-- util.py | 5 ++- 5 files changed, 60 insertions(+), 63 deletions(-) diff --git a/chan/chan.py b/chan/chan.py index 1774a68..fb0bca4 100644 --- a/chan/chan.py +++ b/chan/chan.py @@ -384,68 +384,39 @@ CHANS = { ), rps=1 / 60 ), - "8kun": JsonKunChanHelper( - 29, - "https://8kun.net/", - "https://media.8kun.net/", + "8kun2": JsonKunChanHelper( + 35, + "https://8kun.top/", + "https://media.8kun.top/", "/res/", "file_store/", ( - "1", - "a", - "aneki", - "ara", - "asatru", - "asmr", - "aus", - "b", - "baka", - "brit", - "builders", - "cafechan", - "canada", - "choroy", - "co", - "cuckquean", - "cute", - "cyoa", - "dempart", - "e9y", - "egy", "hgg", - "f1", - "fa", - "feet", - "film", - "furry", - "hentai", - "hisparefugio", - "hypno", - "islam", - "ita", - "jp", - "komika", - "lewd", - "lit", - "magick", - "mde", - "monster", - "mu", - "newbrit", - "newsplus", - "ntr", - "philosophy", - "qresearch", - "rzabczan", - "s", - "sl", - "tg", - "turul", - "tv", - "v", - "vichan", - "vr", - "wx", - "x", + "1", "55chan", "64chen", "8bantb", "8tube", "a", "abdl2", "agdg", "amv", "aneki", "animu", "animus", + "ara", "arda", "arms", "asatru", "asmr", "aus", "ausneets", "b", "baka", "baneposting", "baseballbat", + "bcards", "bleached", "blog", "bonehurtingjuice", "bq", "brit", "bubblegum", "builders", "bunkers", "butt", + "cafechan", "caffe", "canada", "cath", "chori", "choroy", "christian", "christianity", "christianmeme", + "cicachan", "civicrs", "ck", "cloveros", "co", "cow", "cuckquean", "cute", "cyber", "cyoa", "czech", + "dadtalk", "danpu", "dao101", "degen", "delete", "dempart", "desu", "diaperfags", "diaperfetish", "dir", + "dolphin", "dpfag", "dpr", "druid", "e9y", "eatme", "ebola", "eerie", "egy", "egypt", "etika", "eu", + "euskotxa", "exit", "f1", "fa", "fairy", "fallen", "fast", "faygo", "feet", "femaledomination", "feri", + "fightcomms", "film", "flemish", "floss", "fortnite", "freedomzine", "fukemo", "fumo", "fur", "furry", "g", + "gamergatehq", "genesis", "gesu", "ggis", "girltalk", "greenbreeze", "gts", "haxxor", "hentai", + "hentaiclub", "herm", "hermetics", "hgb", "hgg", "hindu", "hisparefugio", "hissss", "hnt", "hover", + "hybrids", "hydrus", "hypno", "hypnochan", "icup", "imperium", "in", "ipfs", "ircsecrets", "islam", "ita", + "jaooo", "jewess", "jmaatv", "joker", "jp", "k", "kekforceusa", "kemono", "kocsog", "kohlchan", "komica", + "komika", "kpop", "lain", "lego", "leo", "lewd", "lit", "lol", "loomis", "loroy", "luddite", "magick", + "maka", "mde", "merrychristmas", "miku", "milf", "mom", "monster", "msb", "mtb", "mtt", "mu", "n0thingness", + "nanachi", "natiofr", "nep", "newbrit", "newsplus", "nobody", "nofap", "nofur", "nogatco", "nothingness", + "ntr", "nuke8", "oanda", "ocb", "ocult", "omorashi", "opmk", "os", "otter", "p", "panconleche", "pdfs", + "peaceofmind", "pen", "philosophy", "pkmns", "pnd", "pokeporn", "polymath", "pone", "projectdcomms", + "pyatibrat", "qm", "qpatriotresearch", "qresearch", "qrnews", "rand21", "rec", "rmart", "rusrandom", + "rzabczan", "s", "s8s", "sag", "sapphic", "shousa", "sikhi", "sip", "sl", "snowboarding", "socpl", "strek", + "subs", "sve", "t", "tan", "tdt", "tech9", "techan", "techbunker", "tek", "templeos", "tenda", "teraha", + "texit", "tf2", "tg", "thb", "thedickshow", "throat", "tibby", "tikilounge", "tkr", "tr55", + "trashcollector", "truthlegion", "tulpamancers", "turul", "tutturu", "tv", "u", "uaco", "ucla", + "underground", "usersunion", "v", "vichan", "vietkong", "vietnam", "vore", "vr", "warposting", "wdsc", + "webm", "wg", "wga", "wikieat", "wis", "wmafsex", "workrelated", "wqt", "wx", "x", "xivl", "xtian", + "zoomerright", "zundel", ), rps=1 ), @@ -625,4 +596,5 @@ CHANS = { ), rps=1 / 3 ), + # next is 36 } diff --git a/chan/helper.py b/chan/helper.py index c1740d3..32274bf 100644 --- a/chan/helper.py +++ b/chan/helper.py @@ -10,6 +10,7 @@ class ChanHelper: self._image_path = image_path self._boards = boards self.rps = rps + self.get_method = None def boards(self): return [b.replace("\\_", "_") for b in self._boards if not b.startswith("_")] diff --git a/chan/json_kun.py b/chan/json_kun.py index 04ea19b..c463728 100644 --- a/chan/json_kun.py +++ b/chan/json_kun.py @@ -1,8 +1,29 @@ +from vanwanet_scrape.scraper import Scraper + from chan.json import JsonChanHelper +from util import logger class JsonKunChanHelper(JsonChanHelper): + def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps): + super().__init__(db_id, base_url, image_url, thread_path, image_path, boards, rps) + + self._scraper = Scraper( + headers={ + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Referer": "https://8kun.top/index.html" + }, + domains=[ + "8kun.top", + "media.8kun.top", + "sys.8kun.net" + ], + logger=logger + ) + + self.get_method = self._scraper.get + def image_url(self, board, tim, extension): return "%s%s%s%s" % (self._image_url, self._image_path, tim, extension) - diff --git a/run.py b/run.py index 38f57d3..4226f0a 100644 --- a/run.py +++ b/run.py @@ -21,7 +21,7 @@ BYPASS_RPS = False class ChanScanner: def __init__(self, helper, proxy): - self.web = Web(monitoring if MONITORING else None, rps=helper.rps, proxy=proxy) + self.web = Web(monitoring if MONITORING else None, rps=helper.rps, get_method=helper.get_method, proxy=proxy) self.helper = helper self.state = ChanState() @@ -142,7 +142,7 @@ class ChanState: def publish_worker(queue: Queue, helper, p): channel = connect() - web = Web(monitoring if MONITORING else None, rps=helper.rps, proxy=p) + web = Web(monitoring if MONITORING else None, rps=helper.rps, get_method=helper.get_method, proxy=p) while True: try: diff --git a/util.py b/util.py index 8bb8e7a..72d6a2b 100644 --- a/util.py +++ b/util.py @@ -25,13 +25,14 @@ logger.addHandler(StreamHandler(sys.stdout)) class Web: - def __init__(self, monitoring, rps=1 / 2, proxy=None): + def __init__(self, monitoring, rps=1 / 2, proxy=None, get_method=None): self.session = requests.Session() if proxy: self.session.proxies = {"http": proxy, "https": proxy} self.session.verify = False self._rps = rps self.monitoring = monitoring + self._get_method = get_method @rate_limit(self._rps) def _get(url, **kwargs): @@ -40,6 +41,8 @@ class Web: while retries > 0: retries -= 1 try: + if self._get_method: + return self._get_method(url, **kwargs) return self.session.get(url, **kwargs) except Exception as e: logger.warning("Error with request %s: %s" % (url, str(e)))