From 6d0e3f0f520d2b18e007f320d0c45a06c17a3b51 Mon Sep 17 00:00:00 2001 From: simon987 Date: Mon, 21 Dec 2020 20:01:24 -0500 Subject: [PATCH] add 9chan --- chan/chan.py | 88 +++++++++++++++++++++++++++++++++++++++ chan/infinitynext_json.py | 72 ++++++++++++++++++++++++++++++++ docker-compose.yml | 8 ++++ get_9chan_boards.py | 51 +++++++++++++++++++++++ 4 files changed, 219 insertions(+) create mode 100644 chan/infinitynext_json.py create mode 100644 get_9chan_boards.py diff --git a/chan/chan.py b/chan/chan.py index b9c79e0..2ee713e 100644 --- a/chan/chan.py +++ b/chan/chan.py @@ -10,6 +10,7 @@ from chan.fchan_html import FChanHtmlChanHelper from chan.hispachan_html import HispachanHtmlHelper from chan.iichan_html import IichanHtmlChanHelper from chan.chan_json import JsonChanHelper +from chan.infinitynext_json import JsonInfinityNextChanHelper from chan.json_kun import JsonKunChanHelper from chan.kev4_php import Kev4PhpHelper from chan.lolnada_html import LolNadaHtmlChanHelper @@ -651,4 +652,91 @@ CHANS = { ), rps=1 / 25 ), + "9chan": JsonInfinityNextChanHelper( + 38, + "https://9chan.tw/", + "https://9chan.tw/", + "/thread/", + "", + ("nido", "b", "bestpol", "baaa2", "leftcel", "furry", "9", "magalichan", "voat", "tech", "aryan", "egoism", + "xxx", "norules", "islam", "follaburra", "left", "choroy", "libertarian", "pepinochan", "trannyhate", "chao", + "cow2", "asmr", "drug", "baaa", "monarchia", "mlpol", "fallen", "tuetuechan", "huaren", "selfimprovement", + "int", "pdfs", "femdom", "ifunny", "sneedkino", "cueva", "test", "solv", "ckva", "lovelive", "gbtv", + "bleached", "mu", "starwars", "oldnorse", "incel", "jauria", "meta", "nido", "b", "bestpol", "baaa2", + "leftcel", "furry", "9", "magalichan", "voat", "tech", "aryan", "egoism", "xxx", "norules", "islam", + "follaburra", "left", "choroy", "libertarian", "pepinochan", "trannyhate", "chao", "cow2", "asmr", "drug", + "baaa", "monarchia", "mlpol", "fallen", "tuetuechan", "huaren", "selfimprovement", "int", "pdfs", "femdom", + "ifunny", "sneedkino", "cueva", "test", "solv", "ckva", "lovelive", "gbtv", "bleached", "mu", "starwars", + "oldnorse", "incel", "jauria", "meta", "gunt", "civu", "videogames", "ancapgenc", "55san", "chaos", + "christian", "intl", "alreadythere", "josh", "juantocades", "kpop", "cyoa", "lole", "nagasakiorg", "health", + "pol2", "collapse", "suicide", "fscchan", "chun", "spee", "bee2", "eroge", "newhalf", "mamertochan", "syspace", + "bitc0in", "jwgirls", "pink", "bosnia", "beauty", "bastet", "wealth", "csspol", "manada", "warwebms", "wrass", + "4chanark", "coalfax", "tulpa", "esneines", "garrettandeerie", "tlnprd", "spam", "grand", "55chan", "butopia", + "bbbb", "harem", "nippon", "loroy", "agdg", "lilium", "ireland", "greatbritain", "voxxe", "karaite", + "initiate", "nodelete", "toon", "guro", "news", "oppai", "nationalanarchism", "ixit", "roblox", "autism", + "technology", "bustin", "argentina", "waifu", "money", "newzealand", "jewish", "schiz", "delogged", + "sonicporn", "guns", "gaychan", "bitwave", "ancap", "civcraft", "imps", "rule34", "retards", "food", "occult", + "baphomet", "fursuits", "bread", "czsk", "toys", "tacos", "philosophy", "lain", "accel", "leftypol", "neutg", + "freezone", "musiczone", "medprim", "sonic", "metokur", "jewishniggers9", "anonclub", "miku", "hell", + "cuteanimegirls", "italia", "bmw", "2drandom", "hentai", "delicious", "entropy", "esoteric", "hack", + "milliondollarextreme", "antiporn", "emugen", "schizo", "arch", "craft", "antiroot", "fatchan", "trotsky", + "boris", "pape", "loli", "virgins", "discord", "cuteboys", "lgbq", "shota", "gayporn", "prep", "thule", + "mental", "scfl", "weeb", "corxea", "loomis", "murderhole", "9tox", "reddit", "europe", "lounge", "image", + "fascist", "heidi", "j41d3n4n1m4710n5", "onion", "thedickshow", "cute", "hgame", "bovines", "commando", + "interracial", "schattenkrieg", "foid", "friends", "gurochanlit", "1984", "fatpeoplehate", "hispachan", + "tuy360", "northwest", "instruments", "1c7k", "vent", "japan", "bantb", "xtian", "tomboy", "bitcoin", + "bizness", "newpol", "online", "wldm", "pettanko", "qanon", "radfem", "anime", "lift", "cuckime", "missouri", + "afrochan", "share", "r9001", "ttrpg", "france", "prueba", "warhammer", "text", "enjambre", "bandada", + "privateparts", "mega", "pen2", "hurt", "jorship", "fatgirls", "ecsy", "redbar", "agatha2", "agatha", "indie", + "nano", "israel", "trap", "doomer", "girls", "intcraft", "perv", "onlypol", "book", "sanctos", "brasil", + "oats", "avnol", "trapshota", "thadhs2", "nemunemu", "andes", "terrydavis", "krautchan", "fursecute", "bugs", + "rdog", "leftnudes", "42069", "furponyweeb", "shrekchan", "shitpostbot", "tenmagab", "newsplus", "bdsm", + "fren", "seed", "video", "dontknow", "states", "death", "draw", "scp173", "hechoparatestear", "coon", "jazz", + "homestuck", "worksafegif", "leveloneb", "nodelete2", "cyber", "null", "guessinggames", "fringe", + "vexillology", "kotatx", "nurd", "nimbusters", "vfur", "startrek", "monster", "mokole", "banter", "mamono", + "russianskill9chan", "hats", "liberia", "cuckhunt", "lolicon", "imas", "gout", "eceleb", "alaska", "madoka", + "fuckjannies", "random", "midpol", "9gag", "nigger", "nsgsig", "libpol", "crypto", "hypno", "product", "ketsu", + "therightstuff", "cons", "digiart", "diochan", "matriarchy", "scifi", "irlnsoc", "chess", "markwiens", "brap", + "deletethistestboard", "toho", "moonman", "concordia", "bee4", "vcollins", "tjwa", "nederland", "lotr", + "solar", "comicsgate", "bisdak", "3dprint", "arda", "goodvibes", "bane", "awoo", "digimon", "blacked", "bike", + "sudpol", "gore", "mecha", "mettaton", "minions", "roze", "ytp2004", "cutefunny", "hisparefugio2", "poopol", + "normiecontainment", "bogdanoff", "wh40k", "ohio", "meow", "peppep", "drugs", "claw", "christianxpol", "vidya", + "canpol", "otter", "janny", "heem", "nook", "streamers", "leaf", "hitler", "neovagina", "dprk", "zoopals", + "xpol", "radio", "catholic", "kiwi", "rekt", "testclover", "ambient", "urbex", "hackfrauds", "2b2t", "xeraph", + "sergal", "animu", "poop", "safespace", "auspol", "swbunker", "jews", "oregon", "lithuania", "troon", "meme", + "neet", "kube", "pony", "bans", "jrpg", "hoshikawamafuyu", "rboard", "feet", "politics", "support", "swag", + "dhsmk2", "weed", "intll", "pokemon", "ecchi", "poli", "hiki", "cuckold", "23213", "ethots", "malware", + "happening", "neets", "piss", "trump", "joshua", "gifs", "bitch", "9chan", "freech", "neovag", "shit", "edgy", + "milsperg", "genesis", "4chan", "niggerology", "ratchet", "mugen", "drama", "themes", "animus", "programming", + "lgbtq", "gurps", "baph", "kind", "commiecat", "poem", "wooo", "cold", "stonks", "tiktok", "kino", "touhou", + "4xgsg", "frogposting", "yiff", "retw", "wikieat", "manifesto", "neoxen", "hate", "tooter", "wall", "bnwo", + "lenny", "againsthateboards", "aust", "rwby", "alcohol", "fitlit", "sips", "fedpost", "erpd", "science", + "rbanter", "venezuela", "consoomer", "1776", "research", "thedonald", "hga2", "testing", "gondola", "drwho", + "tropic", "rustchan", "mlpp", "vector", "ausneets", "civrealms", "cloth", "comics", "grug", "valve", "simp", + "ipv6", "rustlang", "lulz", "niggers", "wagie", "cats", "lief", "poke", "pinball", "hist", "intro", "econ", + "wsgif", "anthro", "corona", "sneed", "dolphin", "para", "julay", "rage", "rust", "abby", "obscuremedia", + "bqoh", "maga", "9pol", "gamedev", "bible", "nshg", "nite", "9chansupport", "centristpol", "iceposeidon2", + "scientology", "bunny", "wiki", "cuck", "communism", "bbbc", "popbob", "drater", "tds2", "trans", "jenkem", + "history", "incels", "smash", "movienight", "pone2", "fash", "pontypandy", "cheat", "photosofjoshuaconnermoon", + "testboardborad", "yhwh", "murec", "anarcho", "vaporwave", "hypnosis", "green", "taqueria", "s9chs", "grza", + "testboard", "vd20", "feds", "deep", "archive", "webm", "monkeynoises", "spooky", "raid", "indy", + "bigtittyslotmachine", "egy", "spacewestern", "videogamegeneral", "hikikomori", "fuckmark", "bose", "fuzhou", + "print", "panela", "testingoof", "lietuva", "brigade", "cringepol", "snow", "endchan", "waitplz", "vore", + "sanic", "ranrol", "buttplug", "traditionalgames", "scurv", "shia", "wota", "yugi", "transphobia", + "soyjakparty", "italy", "atheism", "halal", "kjvbestbible", "market", "fapioh", "en3ma12345m9", "fatego", + "feedprintersfilament", "cows", "illitterate", "inflation", "unreal", "television", "australia", "ozihcs", + "soysoy", "cozy", "choroy1111", "fpsg", "nonsensz", "bitte", "discords", "zoomerright", "meadhall", + "niggersgay", "gdpspawn", "roman", "terf", "buddhist", "atheistpol", "sonicporngeneral", "buddhism", "trihard", + "pyro", "suggestion", "aaaaaaaaaaaaaaaaaaaa", "emoff", "among", "1488", "bulkcheapammo", "spacechan", "wasp", + "scat", "witchhouse", "christianpol", "christianity", "obmedia", "darksydephil", "privacy", "operate", "long", + "leftistpol", "christianidentity", "hnhc", "lawb", "memes", "buddha", "brchan", "truecrime", "meth", "blog", + "opieandanthony", "help", "mexicali", "natsoc", "cuteboy", "1ccccccccccc", "siberia", "vice", "anarkism", + "cooking", "photo", "tobacco", "stim", "rand21", "hooch", "christ", "patch", "invaderwatch", "retro", "alogod", + "cocaine", "deutsch", "streamer", "shrek", "nootropics", "rant", "monarchy", "lbrtn", "arepa", "piripum", + "dogola", "animalcrossing", "devontracey", "bqoa", "vapor", "kush", "lolnada", "autismawareness", + "politicallyincorrect", "hockey", "randb", "traps", "vichan", "ircsecrets", "bosartest111111", "chib", + "testing1234fake", "mdma", "virgo", "homo", "scum", "anal", "gamerhatehq", "vagina", "dump", "advert", + "jueggin", "kike", "type", "robot", "goodguys", "ween", "bankfraudaccountloading", "vhsch"), + rps=1 / 10 + ), } diff --git a/chan/infinitynext_json.py b/chan/infinitynext_json.py new file mode 100644 index 0000000..c5e0867 --- /dev/null +++ b/chan/infinitynext_json.py @@ -0,0 +1,72 @@ +from json import JSONDecodeError +from urllib.parse import urljoin + +import json + +from chan.helper import ChanHelper +from post_process import get_links_from_body +from util import logger + + +class JsonInfinityNextChanHelper(ChanHelper): + + def threads_url(self, board): + return "%s%s/index.json" % (self._base_url, board) + + def posts_url(self, board, thread): + return "%s%s%s%d.json" % (self._base_url, board, self._thread_path, thread["board_id"]) + + @staticmethod + def item_type(item): + return "thread" if "reply_to" not in item or item["reply_to"] is None else "post" + + @staticmethod + def item_id(item): + return item["post_id"] + + @staticmethod + def item_mtime(item): + return item["updated_at"] + + def item_urls(self, item, board): + urls = set() + + if "content_raw" in item and item["content_raw"]: + urls.update(get_links_from_body(item["content_raw"])) + if "attachments" in item and item["attachments"]: + for attachment in item["attachments"]: + urls.add(urljoin(self._image_url, attachment["file_url"])) + + return list(urls) + + @staticmethod + def thread_mtime(thread): + return thread["updated_at"] + + @staticmethod + def parse_threads_list(r): + try: + j = json.loads(r.content.decode('utf-8', 'ignore')) + if len(j) == 0 or "post_id" not in j[0]: + logger.warning("No threads in response for %s: %s" % (r.url, r.text,)) + return [], None + except JSONDecodeError: + logger.warning("JSONDecodeError for %s:" % (r.url,)) + logger.warning(r.text) + return [], None + + return j, None + + @staticmethod + def parse_thread(r): + try: + j = json.loads(r.content.decode('utf-8', 'ignore')) + except JSONDecodeError: + logger.warning("JSONDecodeError for %s:" % (r.url,)) + logger.warning(r.text) + return [] + thread = j.copy() + del thread["replies"] + yield thread + for post in j["replies"]: + yield post diff --git a/docker-compose.yml b/docker-compose.yml index aaf2998..26f23a1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -374,3 +374,11 @@ services: - "CF_CHAN=wizchan" - "CF_REDIS_HOST=redis" - "CF_INFLUXDB=influxdb" + 9chan: + image: simon987/chan_feed + restart: always + user: ${CURRENT_UID} + environment: + - "CF_CHAN=9chan" + - "CF_REDIS_HOST=redis" + - "CF_INFLUXDB=influxdb" diff --git a/get_9chan_boards.py b/get_9chan_boards.py new file mode 100644 index 0000000..bd056a3 --- /dev/null +++ b/get_9chan_boards.py @@ -0,0 +1,51 @@ +import json +import time +import requests +from chan.chan import CHANS + +existing = CHANS["9chan"]._boards +updated = list(existing) +added = set() + + +def mask(board): + for i, b in enumerate(updated): + if b == board: + updated[i] = "_" + board + print("[-] " + board) + + +def unmask(board): + for i, b in enumerate(updated): + if b == ("_" + board): + updated[i] = board + print("[*] " + board) + + +for i in range(0, 50): + r = requests.get( + f"https://9chan.tw/boards.html?lang=&tags=&time={int(time.time())}&title=&sfw=0&sort=&sortBy=desc&page={i}", + headers={ + "Accept": "application/json", + }) + + j = json.loads(r.text) + + if not j["boards"]: + break + + for board in j["boards"].values(): + board = board["board_uri"] + added.add(board) + + if ("_" + board) in updated: + unmask(board) + elif board not in existing: + updated.append(board) + print("[+] " + board) + +for board in existing: + if board not in added: + mask(board) + +print("(" + ",".join('"' + u + '"' for u in updated) + ")")