Update hexlib, bug fixes, refactor, migrate item IDs

This commit is contained in:
simon987 2021-03-07 14:29:36 -05:00
parent 6d0e3f0f52
commit 0133c42d62
23 changed files with 245 additions and 4045 deletions

3
.gitmodules vendored
View File

@ -1,3 +0,0 @@
[submodule "docker_viz/feed_viz"]
path = docker_viz/feed_viz
url = https://github.com/simon987/feed_viz

View File

@ -1,15 +0,0 @@
### chan_feed
Daemon that fetches posts from compatible *chan
image boards and publishes serialised JSON to redis
for real-time ingest.
Compatible image boards: 4chan, lainchan, uboachan,
22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal,
horochan, doushio, desuchan, tgchan, lolnada, 7chan, chanon,
chan.org.li, hispachan, 8kun, nowere, iichan, 2chan and more.
Can optionally push monitoring data to InfluxDB. Below is an
example of Grafana being used to display it.
![monitoring.png](monitoring.png)

View File

@ -40,7 +40,6 @@ CHANS = {
"news", "out", "po", "pol", "qst", "sci", "soc", "sp",
"tg", "toy", "trv", "tv", "vp", "wsg", "wsr", "x"
),
rps=3 / 2
),
"lainchan": JsonChanHelper(
2,
@ -53,7 +52,6 @@ CHANS = {
"hum", "drg", "zzz", "layer", "q", "r", "_cult", "_psy",
"_mega",
),
rps=1 / 60
),
"uboachan": JsonChanHelper(
3,
@ -65,7 +63,6 @@ CHANS = {
"yn", "yndd", "fg", "yume", "o", "lit", "media", "og",
"ig", "2", "ot", "hikki", "cc", "x", "sugg"
),
rps=1 / 120
),
"22chan": JsonChanHelper(
4,
@ -77,7 +74,6 @@ CHANS = {
"a", "b", "f", "yu", "i", "k", "mu", "pol", "sewers",
"sg", "t", "vg"
),
rps=1 / 120
),
"wizchan": JsonChanHelper(
5,
@ -88,7 +84,6 @@ CHANS = {
(
"wiz", "dep", "hob", "lounge", "jp", "meta", "games", "music",
),
rps=1 / 60
),
# TODO
# "1chan": ChanHelper(
@ -100,7 +95,6 @@ CHANS = {
# (
# "rails"
# ),
# rps=1 / 600
# ),
"2chhk": RussianJsonChanHelper(
7,
@ -120,7 +114,6 @@ CHANS = {
"a", "fd", "ja", "ma", "vn", "fg", "fur", "gg", "ga",
"vape", "h", "ho", "hc", "e", "fet", "sex", "fag"
),
rps=1 / 5
),
"endchan": EndchanHtmlChanHelper(
8,
@ -141,7 +134,6 @@ CHANS = {
"ausneets", "qanonresearch", "polru", "yuri", "christianity",
"kc", "rapport", "news", "brit", "webm", "4chon"
),
rps=1 / 10
),
"38chan": JsonChanHelper(
9,
@ -152,7 +144,6 @@ CHANS = {
(
"a", "b", "g", "38"
),
rps=1 / 600
),
"alokal": AlokalJsonChanHelper(
10,
@ -164,7 +155,6 @@ CHANS = {
"b", "pol", "sk", "int", "slav", "s", "gv", "mda", "sp",
"fit", "had",
),
rps=1 / 60
),
"gnfos": JsonChanHelper(
11,
@ -175,7 +165,6 @@ CHANS = {
(
"jp", "drive"
),
rps=1 / 120
),
"synch": SynchJsonChanHelper(
12,
@ -187,7 +176,6 @@ CHANS = {
"b", "d", "_r", "a", "_g", "mlp", "mu", "_tv", "vg",
"_wh", "old", "test"
),
rps=1 / 120
),
"tahta": JsonChanHelper(
13,
@ -198,7 +186,6 @@ CHANS = {
(
"b", "g", "s", "v"
),
rps=1 / 300
),
"awsumchan": JsonChanHelper(
14,
@ -209,7 +196,6 @@ CHANS = {
(
"an", "aw", "cr", "fi", "ra", "au", "ga", "he", "sp"
),
rps=1 / 600
),
"horochan": MayuriChanHelper(
15,
@ -218,7 +204,6 @@ CHANS = {
(
"b",
),
rps=1 / 20
),
"doushio": DoushioHtmlChanHelper(
16,
@ -229,7 +214,6 @@ CHANS = {
(
"moe",
),
rps=1 / 20
),
"desuchan": DesuChanHtmlChanHelper(
17,
@ -245,7 +229,6 @@ CHANS = {
"arrrrr", "brocastan", "gar", "gif", "media", "ot", "r", "w",
"sandbox", "sugg"
),
rps=1 / 30
),
"aurorachan": DesuChanHtmlChanHelper(
18,
@ -257,7 +240,6 @@ CHANS = {
"_bm", "de", "ic", "rp", "rpi", "v", "w", "tg",
"alt", "b", "g", "pkmn", "yuri", "fl", "mu", "sugg"
),
rps=1 / 20
),
"tgchan": TgChanHtmlChanHelper(
19,
@ -268,7 +250,6 @@ CHANS = {
(
"draw", "meep", "quest", "questdis", "tg", "icons",
),
rps=1 / 600,
),
"lolnada": LolNadaHtmlChanHelper(
20,
@ -280,7 +261,6 @@ CHANS = {
"b", "a", "aw", "cgl", "dw", "int", "qt", "sad", "t",
"toy", "v", "x", "34", "e", "f", "h"
),
rps=1 / 60,
),
"fchan": FChanHtmlChanHelper(
21,
@ -291,7 +271,6 @@ CHANS = {
(
"f", "m", "h", "s", "toon", "a", "ah", "c", "artist", "crit", "b"
),
rps=1 / 60,
),
"0chan": ZerochanHtmlChanHelper(
22,
@ -307,7 +286,6 @@ CHANS = {
"poligon", "postach", "psih", "r", "rm", "s", "shrek", "shy", "t",
"test", "tlp", "tmp", "tv", "vg", "vipe", "wh", "xikkadvach", "ynet"
),
rps=1 / 5
),
"410chan": Chan410HtmlChanHelper(
23,
@ -318,7 +296,6 @@ CHANS = {
(
"d", "b", "cu", "dev", "r", "a", "ts", "ci"
),
rps=1 / 120
),
"7chan": Chan7HtmlChanHelper(
24,
@ -335,7 +312,6 @@ CHANS = {
"elit", "fag", "fur", "gif", "h", "men", "pco", "s",
"sm", "ss", "unf", "v",
),
rps=1 / 30
),
"chanon": ChanonHtmlChanHelper(
25,
@ -347,7 +323,6 @@ CHANS = {
"a", "int", "j", "m", "pc", "pol", "prog", "tv",
"b", "milo", "pr0n", "s", "c", "sug",
),
rps=1 / 60
),
"chanorg": JsonChanHelper(
26,
@ -358,7 +333,6 @@ CHANS = {
(
"b", "goys"
),
rps=1 / 60
),
"iichan": IichanHtmlChanHelper(
27,
@ -373,7 +347,6 @@ CHANS = {
"aa", "abe", "c", "fi", "jp", "rm", "tan", "to", "ts",
"vn", "vo", "misc"
),
rps=1 / 10
),
"nowere": NowereHtmlChanHelper(
28,
@ -384,7 +357,6 @@ CHANS = {
(
"b", "d", "tu", "a", "ph", "wa", "cg", "t", "p"
),
rps=1 / 60
),
"8kun2": JsonKunChanHelper(
35,
@ -392,67 +364,84 @@ CHANS = {
"https://media.8kun.top/",
"/res/",
"file_store/",
("1", "55chan", "_64chen", "8bantb", "8tube", "a", "_abdl2", "agdg", "amv", "aneki", "animu", "animus", "ara",
"arda", "arms", "asatru", "asmr", "aus", "ausneets", "__b", "__baka", "_baneposting", "__baseballbat",
"bcards", "bleached", "blog", "__bonehurtingjuice", "bq", "__brit", "bubblegum", "builders", "bunkers", "butt",
"cafechan", "caffe", "canada", "cath", "chori", "choroy", "christian", "christianity", "christianmeme",
"cicachan", "civicrs", "ck", "cloveros", "co", "cow", "__cuckquean", "cute", "cyber", "cyoa", "__czech",
"dadtalk", "danpu", "dao101", "degen", "delete", "dempart", "desu", "diaperfags", "diaperfetish", "dir",
"__dolphin", "dpfag", "_dpr", "druid", "_e9y", "eatme", "ebola", "eerie", "egy", "egypt", "etika", "eu",
"euskotxa", "__exit", "f1", "fa", "fairy", "fallen", "fast", "faygo", "feet", "femaledomination", "feri",
"__fightcomms", "film", "flemish", "floss", "fortnite", "freedomzine", "fukemo", "fumo", "fur", "furry", "g",
"gamergatehq", "genesis", "_gesu", "ggis", "girltalk", "greenbreeze", "gts", "haxxor", "hentai", "hentaiclub",
"__herm", "hermetics", "hgb", "hgg", "__hindu", "hisparefugio", "hissss", "hnt", "hover", "hybrids", "hydrus",
"hypno", "_hypnochan", "icup", "imperium", "in", "ipfs", "ircsecrets", "islam", "ita", "jaooo", "jewess",
"jmaatv", "joker", "jp", "k", "_kekforceusa", "kemono", "kocsog", "kohlchan", "__(komica)", "_komika", "kpop",
"lain", "_lego", "leo", "lewd", "lit", "lol", "loomis", "loroy", "luddite", "magick", "maka", "mde",
"merrychristmas", "miku", "milf", "mom", "monster", "msb", "mtb", "mtt", "mu", "n0thingness", "nanachi",
"natiofr", "nep", "newbrit", "newsplus", "nobody", "nofap", "nofur", "nogatco", "nothingness", "ntr", "_nuke8",
"oanda", "__ocb", "__ocult", "_omorashi", "opmk", "os", "otter", "p", "panconleche", "pdfs", "__peaceofmind",
"pen", "philosophy", "_pkmns", "pnd", "pokeporn", "polymath", "pone", "projectdcomms", "__pyatibrat", "_qm",
"qpatriotresearch", "__qresearch", "qrnews", "__rand21", "rec", "rmart", "rusrandom", "rzabczan", "s", "s8s",
"sag", "sapphic", "shousa", "sikhi", "sip", "sl", "_snowboarding", "socpl", "strek", "subs", "__sve", "t",
"tan", "tdt", "tech9", "techan", "techbunker", "tek", "templeos", "tenda", "teraha", "__texit", "tf2", "__tg",
"_thb", "thedickshow", "throat", "_tibby", "tikilounge", "tkr", "tr55", "__trashcollector", "truthlegion",
"tulpamancers", "turul", "tutturu", "tv", "u", "uaco", "_ucla", "underground", "__usersunion", "v", "vichan",
"vietkong", "vietnam", "vore", "vr", "_warposting", "wdsc", "webm", "wg", "__wga", "wikieat", "wis", "wmafsex",
"workrelated", "wqt", "wx", "x", "__xivl", "__xtian", "zoomerright", "zundel", "0", "55sync", "abdl",
"alleycat", "_arisu", "arisubunker", "_arp", "bane", "_bimbohypnosis", "_bluemoon", "bmn", "brains", "cats",
"_chance", "clang", "comfy", "critters", "_cursed", "_cvine", "cze", "d", "dcaco", "demonp", "_dnmd", "doomer",
"doot", "elitabla", "_empanada", "erp", "_falseflags", "fashionplus", "fata", "femdom", "fit", "_flg",
"_fr8chan", "futyitorna", "garrett", "_giantesshentai", "hentaiporn", "hmfr", "hooliedayz", "hsp", "hujszon",
"iep", "just", "k46", "kind", "_kiwc", "kukichan", "_lacajita", "_legos", "lgd", "liveanarchy",
"luciddreaming", "m", "_mapp", "mental", "_mets", "_milhis", "monarchy", "_myon", "newhomosuck", "newsci",
"_nine", "oes", "onepiece", "_other369", "otomad", "_penguware", "psyid", "qresearch2gen", "rule34",
"_satorare", "sonyeon", "split", "sunflower", "_tae", "test", "_tft", "tftg", "toy", "trap", "_vein",
"_virtualreality", "vivian", "voros", "wbr", "_weird", "wooo", "yuuka", "fringe", "random", "cuteboys", "tech",
"internatiomall", "interracial", "liberty", "htg", "mai", "komica", "cutebois", "argentina", "r", "tf",
"draftnote", "abcu", "k117", "britfeel", "liberty", "htg", "mai", "komica", "cutebois", "argentina", "r", "tf",
"draftnote", "abcu", "k117", "britfeel", "y", "an", "francofil", "portal", "royalhawk", "vdm", "bullmask",
"imouto", "tripfriend", "arepa", "rwby", "sw", "y", "an", "francofil", "portal", "royalhawk", "vdm",
"bullmask", "imouto", "tripfriend", "arepa", "rwby", "sw", "magali", "hikki", "biz", "eris", "india", "mg",
"magali", "hikki", "biz", "eris", "india", "mg", "out", "infinity", "tifa", "muslim", "out", "infinity",
"tifa", "muslim", "slackware", "archivo", "flatearth", "yaoi", "boombox", "wdp", "thedonald",
"libertedexpression", "khyber", "jsr", "slackware", "archivo", "flatearth", "yaoi", "boombox", "wdp",
"thedonald", "libertedexpression", "khyber", "jsr", "fso", "wumpawhip", "buddhismhotline", "indochinaexpats",
"ett", "redbar", "skyline350gt", "asc", "bazafx", "bestkorea", "covid19", "sokra", "bowsu", "qpatriotsunited",
"verzet", "wlctint", "cultstate", "melody", "vedic", "yhvh", "1cok", "astropolis", "fso", "wumpawhip",
"buddhismhotline", "indochinaexpats", "ett", "redbar", "skyline350gt", "asc", "bazafx", "bestkorea", "covid19",
"sokra", "bowsu", "qpatriotsunited", "verzet", "wlctint", "cultstate", "melody", "vedic", "yhvh", "1cok",
"astropolis", "earthlibfront", "pardochan", "stanislawowski", "thetrump", "yukkuri", "1825kun", "cryptobtc",
"isol", "knights", "language", "rr34", "sperg", "awaken", "belgium", "blizzard", "brain", "buddha", "dbs",
"deestevensvoice4you", "f4net", "fuckuchina", "gbtv", "hairygirls", "hallaca", "homeowner", "indo", "jersey",
"jigglypuff", "lbt", "madh4ckrs", "medcorp", "miamichan", "mrsfrisby", "mulatto", "mupro", "nhoodlink",
"p5porn", "patriotrevolution", "peko", "projectobject", "prop", "pups", "qanonspain", "qcastellano",
"earthlibfront", "pardochan", "stanislawowski", "thetrump", "yukkuri", "1825kun", "cryptobtc", "isol",
"knights", "language", "rr34", "sperg", "awaken", "belgium", "blizzard", "brain", "buddha", "dbs",
"deestevensvoice4you", "f4net", "fuckuchina", "gbtv", "hairygirls", "hallaca", "homeowner", "indo", "jersey",
"jigglypuff", "lbt", "madh4ckrs", "medcorp", "miamichan", "mrsfrisby", "mulatto", "mupro", "nhoodlink",
"p5porn", "patriotrevolution", "peko", "projectobject", "prop", "pups", "qanonspain", "qcastellano", "qsocial",
"resist", "revolu", "skemt", "sketheory", "spaceforce", "surro", "thehand", "transit", "vitaecryptocurrency",
"qsocial", "resist", "revolu", "skemt", "sketheory", "spaceforce", "surro", "thehand", "transit",
"vitaecryptocurrency"),
rps=1 / 3
("1", "55chan", "_64chen", "8bantb", "8tube", "a", "_abdl2", "agdg", "_amv", "aneki", "animu", "animus", "ara",
"arda", "_arms", "asatru", "_asmr", "aus", "ausneets", "_b", "_baka", "_baneposting", "_baseballbat",
"_bcards", "bleached", "blog", "_bonehurtingjuice", "_bq", "_brit", "bubblegum", "builders", "bunkers", "butt",
"cafechan", "caffe", "canada", "_cath", "chori", "choroy", "christian", "christianity", "_christianmeme",
"cicachan", "civicrs", "ck", "cloveros", "co", "cow", "_cuckquean", "cute", "cyber", "cyoa", "_czech",
"_dadtalk", "danpu", "dao101", "degen", "delete", "dempart", "desu", "diaperfags", "diaperfetish", "dir",
"_dolphin", "_dpfag", "_dpr", "druid", "_e9y", "_eatme", "ebola", "eerie", "egy", "egypt", "_etika", "_eu",
"_euskotxa", "_exit", "f1", "fa", "_fairy", "fallen", "fast", "faygo", "feet", "femaledomination", "feri",
"_fightcomms", "film", "flemish", "_floss", "fortnite", "freedomzine", "fukemo", "fumo", "fur", "furry", "g",
"gamergatehq", "genesis", "_gesu", "_ggis", "girltalk", "greenbreeze", "gts", "_haxxor", "hentai",
"hentaiclub", "_herm", "_hermetics", "_hgb", "hgg", "_hindu", "hisparefugio", "_hissss", "hnt", "hover",
"hybrids", "_hydrus", "hypno", "_hypnochan", "icup", "imperium", "in", "ipfs", "ircsecrets", "islam", "ita",
"_jaooo", "jewess", "_jmaatv", "_joker", "jp", "k", "_kekforceusa", "kemono", "kocsog", "kohlchan",
"_(komica)", "_komika", "kpop", "lain", "_lego", "leo", "lewd", "lit", "_lol", "loomis", "_loroy", "luddite",
"magick", "maka", "mde", "_merrychristmas", "_miku", "milf", "_mom", "monster", "_msb", "mtb", "mtt", "mu",
"_n0thingness", "_nanachi", "natiofr", "nep", "newbrit", "newsplus", "_nobody", "nofap", "_nofur", "_nogatco",
"nothingness", "ntr", "_nuke8", "_oanda", "_ocb", "_ocult", "_omorashi", "_opmk", "os", "otter", "p",
"_panconleche", "pdfs", "_peaceofmind", "pen", "philosophy", "_pkmns", "pnd", "pokeporn", "polymath", "pone",
"projectdcomms", "_pyatibrat", "_qm", "qpatriotresearch", "qresearch", "qrnews", "_rand21", "rec", "rmart",
"_rusrandom", "rzabczan", "s", "s8s", "_sag", "sapphic", "shousa", "_sikhi", "sip", "sl", "_snowboarding",
"socpl", "strek", "_subs", "_sve", "t", "tan", "tdt", "_tech9", "_techan", "techbunker", "_tek", "templeos",
"tenda", "teraha", "_texit", "tf2", "_tg", "_thb", "_thedickshow", "throat", "_tibby", "tikilounge", "tkr",
"_tr55", "_trashcollector", "truthlegion", "tulpamancers", "turul", "tutturu", "tv", "u", "_uaco", "_ucla",
"underground", "_usersunion", "v", "vichan", "_vietkong", "vietnam", "vore", "vr", "_warposting", "wdsc",
"webm", "wg", "_wga", "wikieat", "wis", "wmafsex", "_workrelated", "_wqt", "wx", "x", "_xivl", "_xtian",
"_zoomerright", "zundel", "0", "55sync", "abdl", "alleycat", "_arisu", "_arisubunker", "_arp", "_bane",
"_bimbohypnosis", "_bluemoon", "bmn", "brains", "cats", "_chance", "clang", "comfy", "_critters", "_cursed",
"_cvine", "_cze", "d", "dcaco", "_demonp", "_dnmd", "doomer", "doot", "elitabla", "_empanada", "erp",
"_falseflags", "fashionplus", "_fata", "femdom", "fit", "_flg", "_fr8chan", "futyitorna", "garrett",
"_giantesshentai", "hentaiporn", "_hmfr", "hooliedayz", "hsp", "_hujszon", "_iep", "just", "k46", "_kind",
"_kiwc", "kukichan", "_lacajita", "_legos", "_lgd", "liveanarchy", "_luciddreaming", "m", "_mapp", "mental",
"_mets", "_milhis", "monarchy", "_myon", "newhomosuck", "newsci", "_nine", "_oes", "_onepiece", "_other369",
"_otomad", "_penguware", "psyid", "qresearch2gen", "rule34", "_satorare", "sonyeon", "split", "_sunflower",
"_tae", "test", "_tft", "tftg", "toy", "trap", "_vein", "_virtualreality", "vivian", "voros", "wbr", "_weird",
"wooo", "yuuka", "fringe", "random", "cuteboys", "tech", "_internatiomall", "interracial", "liberty", "htg",
"mai", "komica", "cutebois", "argentina", "r", "tf", "draftnote", "abcu", "_k117", "britfeel", "liberty",
"htg", "mai", "komica", "cutebois", "argentina", "r", "tf", "draftnote", "abcu", "_k117", "britfeel", "y",
"an", "francofil", "portal", "_royalhawk", "_vdm", "_bullmask", "imouto", "tripfriend", "arepa", "rwby", "sw",
"y", "an", "francofil", "portal", "_royalhawk", "_vdm", "_bullmask", "imouto", "tripfriend", "arepa", "rwby",
"sw", "magali", "hikki", "biz", "eris", "india", "mg", "magali", "hikki", "biz", "eris", "india", "mg", "out",
"_infinity", "tifa", "_muslim", "out", "_infinity", "tifa", "_muslim", "slackware", "archivo", "_flatearth",
"_yaoi", "_boombox", "_wdp", "thedonald", "libertedexpression", "_khyber", "jsr", "slackware", "archivo",
"_flatearth", "_yaoi", "_boombox", "_wdp", "thedonald", "libertedexpression", "_khyber", "jsr", "fso",
"wumpawhip", "_buddhismhotline", "indochinaexpats", "_ett", "_redbar", "_skyline350gt", "_asc", "bazafx",
"bestkorea", "covid19", "_sokra", "_bowsu", "_qpatriotsunited", "_verzet", "_wlctint", "_cultstate", "_melody",
"_vedic", "yhvh", "1cok", "_astropolis", "fso", "wumpawhip", "_buddhismhotline", "indochinaexpats", "_ett",
"_redbar", "_skyline350gt", "_asc", "bazafx", "bestkorea", "covid19", "_sokra", "_bowsu", "_qpatriotsunited",
"_verzet", "_wlctint", "_cultstate", "_melody", "_vedic", "yhvh", "1cok", "_astropolis", "_earthlibfront",
"_pardochan", "_stanislawowski", "_thetrump", "yukkuri", "1825kun", "cryptobtc", "_isol", "_knights",
"language", "_rr34", "_sperg", "_awaken", "_belgium", "_blizzard", "_brain", "buddha", "_dbs",
"_deestevensvoice4you", "_f4net", "_fuckuchina", "_gbtv", "hairygirls", "_hallaca", "_homeowner", "indo",
"_jersey", "_jigglypuff", "_lbt", "_madh4ckrs", "_medcorp", "_miamichan", "mrsfrisby", "_mulatto", "_mupro",
"_nhoodlink", "_p5porn", "_patriotrevolution", "_peko", "_projectobject", "_prop", "pups", "_qanonspain",
"_qcastellano", "_earthlibfront", "_pardochan", "_stanislawowski", "_thetrump", "yukkuri", "1825kun",
"cryptobtc", "_isol", "_knights", "language", "_rr34", "_sperg", "_awaken", "_belgium", "_blizzard", "_brain",
"buddha", "_dbs", "_deestevensvoice4you", "_f4net", "_fuckuchina", "_gbtv", "hairygirls", "_hallaca",
"_homeowner", "indo", "_jersey", "_jigglypuff", "_lbt", "_madh4ckrs", "_medcorp", "_miamichan", "mrsfrisby",
"_mulatto", "_mupro", "_nhoodlink", "_p5porn", "_patriotrevolution", "_peko", "_projectobject", "_prop",
"pups", "_qanonspain", "_qcastellano", "qsocial", "_resist", "_revolu", "_skemt", "_sketheory", "_spaceforce",
"_surro", "_thehand", "_transit", "_vitaecryptocurrency", "qsocial", "_resist", "_revolu", "_skemt",
"_sketheory", "_spaceforce", "_surro", "_thehand", "_transit", "_vitaecryptocurrency", "midnightriders",
"tingles", "1cc", "prog", "ytc", "arcagayghetto", "prog", "ytc", "arcagayghetto", "2hu", "o", "warroom", "2hu",
"o", "warroom", "ebon", "xiaomicha", "ebon", "xiaomicha", "gnosticwarfare", "moldnet", "zenczan", "cosplay",
"otakus", "nohup", "frenzone", "8dixie", "hqa", "pundit", "vrgg", "uf0", "malaysia", "gnosticwarfare",
"moldnet", "zenczan", "cosplay", "otakus", "nohup", "frenzone", "8dixie", "hqa", "pundit", "vrgg", "uf0",
"malaysia", "instruments", "unlightopen", "pso2g", "jozsicsan", "komijoke", "bmsgeu", "92k", "komicaz", "pcal",
"accent", "wethepatriots", "porussia", "1a", "tarhana", "bigwomen", "maths", "instruments", "unlightopen",
"pso2g", "jozsicsan", "komijoke", "bmsgeu", "92k", "komicaz", "pcal", "accent", "wethepatriots", "porussia",
"1a", "tarhana", "bigwomen", "maths", "coffeetalk", "arcader", "kingcrimson", "moonlight", "trkey", "whogen",
"xivlgr", "amichan", "gendercritical", "inflg", "komicalol", "capcom", "coser", "cud", "feedism", "grc",
"reimuchan", "stalker2", "2020istheyear", "carib", "jumpchen", "mishmash", "qbl", "sakurachan", "satsukichan",
"taodick", "aes", "gacha", "nfl2", "redlands", "traditionalcatholics", "tsiou", "airsoft2", "animation",
"cafardchan", "chrstdis", "coffeetalk", "arcader", "kingcrimson", "moonlight", "trkey", "whogen", "xivlgr",
"amichan", "gendercritical", "inflg", "komicalol", "capcom", "coser", "cud", "feedism", "grc", "reimuchan",
"stalker2", "2020istheyear", "carib", "jumpchen", "mishmash", "qbl", "sakurachan", "satsukichan", "taodick",
"aes", "gacha", "nfl2", "redlands", "traditionalcatholics", "tsiou", "airsoft2", "animation", "cafardchan",
"chrstdis", "komicamc", "marista", "neetpride", "numis", "progmusic", "retrogaminggifs", "warcraft2004",
"komicamc", "marista", "neetpride", "numis", "progmusic", "retrogaminggifs", "warcraft2004"),
),
"hispachan": HispachanHtmlHelper(
30,
@ -466,7 +455,6 @@ CHANS = {
"cl", "co", "ec", "es", "mx", "pe", "py", "uy", "ve", "d",
"h", "o", "s", "sar", "scl", "sco", "ses", "smx", "spe", "sve",
),
rps=1 / 20
),
"sushigirl": JsonChanHelper(
31,
@ -478,7 +466,6 @@ CHANS = {
"archive", "wildcard", "lounge", "arcade", "kawaii",
"kitchen", "tunes", "culture", "silicon", "yakuza", "hell", "lewd"
),
rps=1 / 30
),
"4kev": Kev4PhpHelper(
32,
@ -491,7 +478,6 @@ CHANS = {
"politics", "programming", "random", "technology",
"television", "videogames",
),
rps=1 / 20
),
"plus4chan": Plus4ChanHelper(
33,
@ -503,7 +489,6 @@ CHANS = {
"baw", "co", "cog", "jam", "mtv",
"coc", "draw", "pco", "coq", "cod", "a"
),
rps=1 / 15
),
"2chan": Chan2Helper(
34,
@ -628,7 +613,6 @@ CHANS = {
"oe", # ??? お絵sql
"72", # ??? お絵sqlip
),
rps=1 / 3
),
"waifuist": LynxChanHelper(
36,
@ -639,7 +623,6 @@ CHANS = {
(
"w", "starlet", "etc",
),
rps=1 / 25
),
"cutiegarden": LynxChanHelper(
37,
@ -650,7 +633,6 @@ CHANS = {
(
"lg", "cozy", "meta", "test"
),
rps=1 / 25
),
"9chan": JsonInfinityNextChanHelper(
38,
@ -737,6 +719,5 @@ CHANS = {
"politicallyincorrect", "hockey", "randb", "traps", "vichan", "ircsecrets", "bosartest111111", "chib",
"testing1234fake", "mdma", "virgo", "homo", "scum", "anal", "gamerhatehq", "vagina", "dump", "advert",
"jueggin", "kike", "type", "robot", "goodguys", "ween", "bankfraudaccountloading", "vhsch"),
rps=1 / 10
),
}

View File

@ -1,9 +1,10 @@
import json
from json import JSONDecodeError
from hexlib.log import logger
from chan.helper import ChanHelper
from post_process import get_links_from_body
from util import logger
class JsonChanHelper(ChanHelper):

View File

@ -2,14 +2,13 @@ from bs4 import BeautifulSoup
class ChanHelper:
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps):
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards):
self.db_id = db_id
self._base_url = base_url
self._image_url = image_url
self._thread_path = thread_path
self._image_path = image_path
self._boards = boards
self.rps = rps
self.get_method = None
self.save_folder = None
@ -37,7 +36,7 @@ class ChanHelper:
raise NotImplementedError
def item_unique_id(self, item, board):
return int(self.board_hash(board) + str(self.item_id(item)))
return board + str(self.item_id(item))
@staticmethod
def thread_mtime(thread):

View File

@ -3,9 +3,9 @@ import re
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from hexlib.log import logger
from chan.desuchan_html import DesuChanHtmlChanHelper
from util import logger
def _ts(text):

View File

@ -3,9 +3,10 @@ from urllib.parse import urljoin
import json
from hexlib.log import logger
from chan.helper import ChanHelper
from post_process import get_links_from_body
from util import logger
class JsonInfinityNextChanHelper(ChanHelper):

View File

@ -1,7 +1,7 @@
from vanwanet_scrape.scraper import Scraper
from chan.chan_json import JsonChanHelper
from util import logger
from hexlib.log import logger
class JsonKunChanHelper(JsonChanHelper):
@ -10,8 +10,8 @@ class JsonKunChanHelper(JsonChanHelper):
def item_type(item):
return "thread" if item["resto"] == 0 else "post"
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps):
super().__init__(db_id, base_url, image_url, thread_path, image_path, boards, rps)
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards):
super().__init__(db_id, base_url, image_url, thread_path, image_path, boards)
self._scraper = Scraper(
headers={

View File

@ -7,14 +7,14 @@ import cloudscraper
import sys
from chan.helper import ChanHelper
from util import logger
from hexlib.log import logger
class LynxChanHelper(ChanHelper):
"""See https://gitgud.io/LynxChan/LynxChan/blob/master/doc/Json.txt"""
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps):
super().__init__(db_id, base_url, image_url, thread_path, image_path, boards, rps)
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards):
super().__init__(db_id, base_url, image_url, thread_path, image_path, boards)
scraper = cloudscraper.create_scraper()
if len(sys.argv) > 3:

View File

@ -3,13 +3,13 @@ from json import JSONDecodeError
from chan.helper import ChanHelper
from post_process import get_links_from_body
from util import logger
from hexlib.log import logger
class MayuriChanHelper(ChanHelper):
def __init__(self, db_id, base_url, image_url, boards, rps):
super().__init__(db_id, base_url, image_url, None, None, boards, rps)
def __init__(self, db_id, base_url, image_url, boards):
super().__init__(db_id, base_url, image_url, None, None, boards)
@staticmethod
def item_id(item):

View File

@ -3,8 +3,7 @@ from json import JSONDecodeError
from chan.helper import ChanHelper
from post_process import get_links_from_body
from util import logger
from hexlib.log import logger
class RussianJsonChanHelper(ChanHelper):

View File

@ -36,6 +36,10 @@ class TgChanHtmlChanHelper(DesuChanHtmlChanHelper):
posts = []
for post_el in op_el.find_all("table", recursive=False):
*_, time = post_el.find("label").children
if post_el.get("class") and "userdelete" in post_el.get("class"):
continue
posts.append({
"id": int(post_el.find("td", attrs={"class", "reply"}).get("id")[5:]),
"type": "post",

View File

@ -1,384 +1,247 @@
version: "2.1"
volumes:
influxdb_data:
pg_data:
pg_data_imhash:
version: "3"
services:
influxdb:
image: influxdb:alpine
volumes:
- influxdb_data:/var/lib/influxdb
grafana:
image: grafana/grafana
ports:
- 127.0.0.1:3006:3000
environment:
- "GF_SECURITY_ADMIN_PASSWORD=changeme"
db:
image: postgres
volumes:
- pg_data:/var/lib/postgresql/data
environment:
- "POSTGRES_USER=feed_archiver"
- "POSTGRES_PASSWORD=changeme"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U feed_archiver"]
interval: 5s
timeout: 5s
retries: 5
db_imhashdb:
image: simon987/pg_hamming
volumes:
- pg_data_imhash:/var/lib/postgresql/data
environment:
- "POSTGRES_USER=imhashdb"
- "POSTGRES_PASSWORD=changeme"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U imhashdb"]
interval: 5s
timeout: 5s
retries: 5
redis:
image: redis
archiver:
image: simon987/feed_archiver
restart: always
depends_on:
db:
condition: service_healthy
environment:
- "FA_DB_HOST=db"
- "FA_DB_USER=feed_archiver"
- "FA_DB_PASSWORD=changeme"
- "FA_REDIS_ADDR=redis:6379"
- "FA_PATTERN=arc.*"
imhashdb:
image: simon987/imhashdb
restart: always
entrypoint: "/build/imhashdb/cli/cli hasher"
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "IMHASHDB_STORE=/data"
- "IMHASHDB_REDIS_ADDR=redis:6379"
- "IMHASHDB_PG_USER=imhashdb"
- "IMHASHDB_PG_PASSWORD=changeme"
- "IMHASHDB_PG_DATABASE=imhashdb"
- "IMHASHDB_PG_HOST=db_imhashdb"
- "IMHASHDB_HASH_CONCURRENCY=16"
# Image boards
4chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=4chan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
0chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=0chan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
22chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=22chan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
2chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=2chan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
2chhk:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=2chhk"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
38chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=38chan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
410chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=410chan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
4kev:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=4kev"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
7chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=7chan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
8kun:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=8kun"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
alokal:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=alokal"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
aurorachan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=aurorachan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
awsumchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=awsumchan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
chanon:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=chanon"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
chanorg:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=chanorg"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
desuchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=desuchan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
doushio:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=doushio"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
endchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=endchan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
fchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=fchan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
gnfos:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=gnfos"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
hispachan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=hispachan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
horochan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=horochan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
iichan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=iichan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
lainchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=lainchan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
lolnada:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=lolnada"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
nowere:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=nowere"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
plus4chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=plus4chan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
sushigirl:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=sushigirl"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
synch:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=synch"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
tahta:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=tahta"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
tgchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=tgchan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
uboachan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=uboachan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
waifuist:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=waifuist"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
wizchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=wizchan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"
9chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
environment:
- "CF_CHAN=9chan"
- "CF_REDIS_HOST=redis"
- "CF_INFLUXDB=influxdb"
- "REDIS_HOST=redis"

View File

@ -1,6 +0,0 @@
FROM nginx:alpine
COPY nginx.conf /etc/nginx/
COPY ["/feed_viz", "/webroot"]
EXPOSE 80

@ -1 +0,0 @@
Subproject commit c8e11a73d74e6af19cab581c94abf943daea050e

View File

@ -1,48 +0,0 @@
user nginx;
worker_processes 1;
error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
keepalive_timeout 65;
upstream socket {
server ws_adapter:3090;
}
server {
listen 80;
index index.html;
root /webroot;
location / {
try_files $uri $uri/ /index.html;
}
location /socket {
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "Upgrade";
proxy_set_header Host $host;
proxy_read_timeout 86400;
proxy_pass http://socket;
}
}
}

View File

@ -1,11 +1,28 @@
import json
import requests
from hexlib.log import logger
from vanwanet_scrape.scraper import Scraper
from chan.chan import CHANS
existing = CHANS["8kun2"]._boards
updated = list(existing)
added = set()
scraper = Scraper(
headers={
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Referer": "https://8kun.top/index.html"
},
domains=[
"8kun.top",
"media.8kun.top",
"sys.8kun.net"
],
logger=logger
)
def mask(board):
for i, b in enumerate(updated):
@ -22,8 +39,7 @@ def unmask(board):
for i in range(0, 500, 50):
r = requests.get("https://sys.8kun.top/board-search.php?page=" + str(i))
r = scraper.get("https://sys.8kun.top/board-search.php?page=" + str(i))
j = json.loads(r.text)
for board in j["boards"]:
@ -36,7 +52,7 @@ for i in range(0, 500, 50):
print("[+] " + board)
for board in existing:
if board not in added:
if board not in added and not board.startswith("_"):
mask(board)
print("(" + ",".join('"' + u + '"' for u in updated) + ")")

File diff suppressed because it is too large Load Diff

73
migrate_item_ids.py Normal file
View File

@ -0,0 +1,73 @@
import itertools
import orjson
import psycopg2
from hexlib.misc import buffered
from tqdm import tqdm
from hexlib.db import pg_fetch_cursor_all
from chan.chan import CHANS
if __name__ == '__main__':
conn = psycopg2.connect(
host="192.168.1.70",
port="5432",
user="feed_archiver",
password="",
dbname="feed_archiver"
)
conn.set_client_encoding("utf8")
table = "chan_4chan_post"
new_table = "chan2_4chan_post"
print(table)
# chan_name = table.split("_")[1]
# chan = CHANS[chan_name]
cur = conn.cursor()
cur2 = conn.cursor()
cur2.execute("""
CREATE TABLE IF NOT EXISTS %s (
id TEXT PRIMARY KEY NOT NULL,
archived_on TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
data JSONB NOT NULL
);
""" % new_table)
cur.execute("SELECT COUNT(*) FROM %s" % table)
row_count = cur.fetchone()[0]
cur.execute("DECLARE cur1 CURSOR FOR SELECT * FROM %s" % table)
rows = pg_fetch_cursor_all(cur, name="cur1", batch_size=5000)
@buffered(batch_size=1000)
def pg_bulk_insert(rows):
val_count = len(rows[0])
cur2.execute(
"INSERT INTO %s VALUES %s ON CONFLICT DO NOTHING" %
(
new_table,
", ".join(("(" + ",".join("%s" for _ in range(val_count)) + ")") for _ in rows)
),
list(itertools.chain(*rows))
)
for row in tqdm(rows, total=row_count):
id_, archived_on, data = row
new_id = data["_board"] + str(data["no"])
pg_bulk_insert([
(new_id, archived_on, orjson.dumps(data).decode())
])
pg_bulk_insert(None)
conn.commit()

Binary file not shown.

Before

Width:  |  Height:  |  Size: 366 KiB

83
run.py
View File

@ -1,40 +1,24 @@
import datetime
import json
import os
import time
import traceback
from datetime import datetime
from queue import Queue
from threading import Thread
import redis
from hexlib.concurrency import queue_iter
from hexlib.db import VolatileBooleanState, VolatileState
from hexlib.monitoring import Monitoring
from hexlib.env import get_web, get_redis
from hexlib.log import logger
from chan.chan import CHANS
from post_process import post_process
from util import logger, Web
BYPASS_RPS = False
DBNAME = "chan_feed"
if os.environ.get("CF_INFLUXDB"):
influxdb = Monitoring(DBNAME, host=os.environ.get("CF_INFLUXDB"), logger=logger, batch_size=100, flush_on_exit=True)
MONITORING = True
else:
MONITORING = False
REDIS_HOST = os.environ.get("CF_REDIS_HOST", "localhost")
REDIS_PORT = os.environ.get("CF_REDIS_PORT", 6379)
CHAN = os.environ.get("CF_CHAN", None)
CF_PUBLISH = os.environ.get("CF_PUBLISH", False)
ARC_LISTS = os.environ.get("CF_ARC_LISTS", "arc").split(",")
class ChanScanner:
def __init__(self, helper, proxy):
self.web = Web(influxdb if MONITORING else None, rps=helper.rps, get_method=helper.get_method, proxy=proxy)
def __init__(self, helper):
self.web = get_web()
self.helper = helper
self.state = state
@ -83,9 +67,8 @@ def once(func):
class ChanState:
def __init__(self, prefix):
self._posts = VolatileBooleanState(prefix, host=REDIS_HOST, port=REDIS_PORT)
self._threads = VolatileState(prefix, host=REDIS_HOST, port=REDIS_PORT)
print("redis host=" + REDIS_HOST)
self._posts = VolatileBooleanState(prefix)
self._threads = VolatileState(prefix)
def mark_visited(self, item: int):
self._posts["posts"][item] = True
@ -109,18 +92,12 @@ class ChanState:
}
def publish_worker(queue: Queue, helper, p):
while True:
def publish_worker(queue: Queue, helper):
for item, board in queue_iter(queue):
try:
item, board = queue.get()
if item is None:
break
publish(item, board, helper,)
publish(item, board, helper)
except Exception as e:
logger.error(str(e) + ": " + traceback.format_exc())
finally:
queue.task_done()
@once
@ -131,23 +108,7 @@ def publish(item, board, helper):
routing_key = "%s.%s.%s" % (CHAN, item_type, board)
message = json.dumps(item, separators=(',', ':'), ensure_ascii=False, sort_keys=True)
if CF_PUBLISH:
rdb.publish("chan." + routing_key, message)
for arc in ARC_LISTS:
rdb.lpush(arc + ".chan." + routing_key, message)
if MONITORING:
distance = datetime.utcnow() - datetime.utcfromtimestamp(helper.item_mtime(item))
influxdb.log([{
"measurement": CHAN,
"time": str(datetime.utcnow()),
"tags": {
"board": board
},
"fields": {
"distance": distance.total_seconds()
}
}])
rdb.lpush("arc.chan2." + routing_key, message)
if __name__ == "__main__":
@ -157,30 +118,20 @@ if __name__ == "__main__":
if save_folder:
chan_helper.save_folder = save_folder
proxy = None
if os.environ.get("CF_PROXY"):
proxy = os.environ.get("CF_PROXY")
logger.info("Using proxy %s" % proxy)
if BYPASS_RPS:
chan_helper.rps = 10
state = ChanState(CHAN)
rdb = redis.Redis(host=REDIS_HOST, port=REDIS_PORT)
rdb = get_redis()
publish_q = Queue()
for _ in range(3):
publish_thread = Thread(target=publish_worker, args=(publish_q, chan_helper, proxy))
publish_thread.setDaemon(True)
publish_thread.start()
publish_thread = Thread(target=publish_worker, args=(publish_q, chan_helper))
publish_thread.setDaemon(True)
publish_thread.start()
s = ChanScanner(chan_helper, proxy)
s = ChanScanner(chan_helper)
while True:
try:
for p, b in s.all_posts():
publish_q.put((p, b))
except KeyboardInterrupt as e:
print("cleanup..")
for _ in range(3):
publish_q.put((None, None))
publish_q.put(None)
break

View File

@ -1,2 +0,0 @@
#!/bin/bash
CURRENT_UID=$(id -u):$(id -g) SAVE_FOLDER=$(pwd)/data docker-compose up --force-recreate

86
util.py
View File

@ -1,86 +0,0 @@
import logging
import sys
import traceback
from datetime import datetime
from logging import FileHandler, StreamHandler
import requests
from hexlib.misc import rate_limit
from urllib3 import disable_warnings
disable_warnings()
last_time_called = dict()
logger = logging.getLogger("default")
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s %(levelname)-5s %(message)s')
for h in logger.handlers:
logger.removeHandler(h)
logger.addHandler(StreamHandler(sys.stdout))
class Web:
def __init__(self, monitoring, rps=1 / 2, proxy=None, get_method=None):
self.session = requests.Session()
if proxy:
self.session.proxies = {"http": proxy, "https": proxy}
self.session.verify = False
self._rps = rps
self.monitoring = monitoring
self._get_method = get_method
@rate_limit(self._rps)
def _get(url, **kwargs):
retries = 3
while retries > 0:
retries -= 1
try:
if self._get_method:
return self._get_method(url, **kwargs)
return self.session.get(url, **kwargs)
except KeyboardInterrupt as e:
raise e
except Exception as e:
logger.warning("Error with request %s: %s" % (url, str(e)))
raise Exception("Gave up request after maximum number of retries")
self._get = _get
def get(self, url, **kwargs):
try:
r = self._get(url, **kwargs)
logger.debug("GET %s <%d>" % (url, r.status_code))
if self.monitoring:
self.monitoring.log([{
"measurement": "web",
"time": str(datetime.utcnow()),
"fields": {
"status_code": r.status_code,
"size": len(r.content),
},
"tags": {
"ok": r.status_code == 200
},
}])
return r
except KeyboardInterrupt as e:
raise e
except Exception as e:
logger.error(str(e) + traceback.format_exc())
if self.monitoring:
self.monitoring.log([{
"measurement": "web",
"time": str(datetime.utcnow()),
"fields": {
"status_code": 0,
"size": 0,
},
"tags": {
"ok": False
},
}])
return None