mirror of
https://github.com/simon987/chan_feed.git
synced 2025-04-10 14:06:42 +00:00
Update hexlib, bug fixes, refactor, migrate item IDs
This commit is contained in:
parent
6d0e3f0f52
commit
0133c42d62
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -1,3 +0,0 @@
|
|||||||
[submodule "docker_viz/feed_viz"]
|
|
||||||
path = docker_viz/feed_viz
|
|
||||||
url = https://github.com/simon987/feed_viz
|
|
15
README.md
15
README.md
@ -1,15 +0,0 @@
|
|||||||
### chan_feed
|
|
||||||
|
|
||||||
Daemon that fetches posts from compatible *chan
|
|
||||||
image boards and publishes serialised JSON to redis
|
|
||||||
for real-time ingest.
|
|
||||||
|
|
||||||
Compatible image boards: 4chan, lainchan, uboachan,
|
|
||||||
22chan, wizchan, 1chan, 2ch.hk, endchan, 38chan, alokal,
|
|
||||||
horochan, doushio, desuchan, tgchan, lolnada, 7chan, chanon,
|
|
||||||
chan.org.li, hispachan, 8kun, nowere, iichan, 2chan and more.
|
|
||||||
|
|
||||||
Can optionally push monitoring data to InfluxDB. Below is an
|
|
||||||
example of Grafana being used to display it.
|
|
||||||
|
|
||||||

|
|
175
chan/chan.py
175
chan/chan.py
@ -40,7 +40,6 @@ CHANS = {
|
|||||||
"news", "out", "po", "pol", "qst", "sci", "soc", "sp",
|
"news", "out", "po", "pol", "qst", "sci", "soc", "sp",
|
||||||
"tg", "toy", "trv", "tv", "vp", "wsg", "wsr", "x"
|
"tg", "toy", "trv", "tv", "vp", "wsg", "wsr", "x"
|
||||||
),
|
),
|
||||||
rps=3 / 2
|
|
||||||
),
|
),
|
||||||
"lainchan": JsonChanHelper(
|
"lainchan": JsonChanHelper(
|
||||||
2,
|
2,
|
||||||
@ -53,7 +52,6 @@ CHANS = {
|
|||||||
"hum", "drg", "zzz", "layer", "q", "r", "_cult", "_psy",
|
"hum", "drg", "zzz", "layer", "q", "r", "_cult", "_psy",
|
||||||
"_mega",
|
"_mega",
|
||||||
),
|
),
|
||||||
rps=1 / 60
|
|
||||||
),
|
),
|
||||||
"uboachan": JsonChanHelper(
|
"uboachan": JsonChanHelper(
|
||||||
3,
|
3,
|
||||||
@ -65,7 +63,6 @@ CHANS = {
|
|||||||
"yn", "yndd", "fg", "yume", "o", "lit", "media", "og",
|
"yn", "yndd", "fg", "yume", "o", "lit", "media", "og",
|
||||||
"ig", "2", "ot", "hikki", "cc", "x", "sugg"
|
"ig", "2", "ot", "hikki", "cc", "x", "sugg"
|
||||||
),
|
),
|
||||||
rps=1 / 120
|
|
||||||
),
|
),
|
||||||
"22chan": JsonChanHelper(
|
"22chan": JsonChanHelper(
|
||||||
4,
|
4,
|
||||||
@ -77,7 +74,6 @@ CHANS = {
|
|||||||
"a", "b", "f", "yu", "i", "k", "mu", "pol", "sewers",
|
"a", "b", "f", "yu", "i", "k", "mu", "pol", "sewers",
|
||||||
"sg", "t", "vg"
|
"sg", "t", "vg"
|
||||||
),
|
),
|
||||||
rps=1 / 120
|
|
||||||
),
|
),
|
||||||
"wizchan": JsonChanHelper(
|
"wizchan": JsonChanHelper(
|
||||||
5,
|
5,
|
||||||
@ -88,7 +84,6 @@ CHANS = {
|
|||||||
(
|
(
|
||||||
"wiz", "dep", "hob", "lounge", "jp", "meta", "games", "music",
|
"wiz", "dep", "hob", "lounge", "jp", "meta", "games", "music",
|
||||||
),
|
),
|
||||||
rps=1 / 60
|
|
||||||
),
|
),
|
||||||
# TODO
|
# TODO
|
||||||
# "1chan": ChanHelper(
|
# "1chan": ChanHelper(
|
||||||
@ -100,7 +95,6 @@ CHANS = {
|
|||||||
# (
|
# (
|
||||||
# "rails"
|
# "rails"
|
||||||
# ),
|
# ),
|
||||||
# rps=1 / 600
|
|
||||||
# ),
|
# ),
|
||||||
"2chhk": RussianJsonChanHelper(
|
"2chhk": RussianJsonChanHelper(
|
||||||
7,
|
7,
|
||||||
@ -120,7 +114,6 @@ CHANS = {
|
|||||||
"a", "fd", "ja", "ma", "vn", "fg", "fur", "gg", "ga",
|
"a", "fd", "ja", "ma", "vn", "fg", "fur", "gg", "ga",
|
||||||
"vape", "h", "ho", "hc", "e", "fet", "sex", "fag"
|
"vape", "h", "ho", "hc", "e", "fet", "sex", "fag"
|
||||||
),
|
),
|
||||||
rps=1 / 5
|
|
||||||
),
|
),
|
||||||
"endchan": EndchanHtmlChanHelper(
|
"endchan": EndchanHtmlChanHelper(
|
||||||
8,
|
8,
|
||||||
@ -141,7 +134,6 @@ CHANS = {
|
|||||||
"ausneets", "qanonresearch", "polru", "yuri", "christianity",
|
"ausneets", "qanonresearch", "polru", "yuri", "christianity",
|
||||||
"kc", "rapport", "news", "brit", "webm", "4chon"
|
"kc", "rapport", "news", "brit", "webm", "4chon"
|
||||||
),
|
),
|
||||||
rps=1 / 10
|
|
||||||
),
|
),
|
||||||
"38chan": JsonChanHelper(
|
"38chan": JsonChanHelper(
|
||||||
9,
|
9,
|
||||||
@ -152,7 +144,6 @@ CHANS = {
|
|||||||
(
|
(
|
||||||
"a", "b", "g", "38"
|
"a", "b", "g", "38"
|
||||||
),
|
),
|
||||||
rps=1 / 600
|
|
||||||
),
|
),
|
||||||
"alokal": AlokalJsonChanHelper(
|
"alokal": AlokalJsonChanHelper(
|
||||||
10,
|
10,
|
||||||
@ -164,7 +155,6 @@ CHANS = {
|
|||||||
"b", "pol", "sk", "int", "slav", "s", "gv", "mda", "sp",
|
"b", "pol", "sk", "int", "slav", "s", "gv", "mda", "sp",
|
||||||
"fit", "had",
|
"fit", "had",
|
||||||
),
|
),
|
||||||
rps=1 / 60
|
|
||||||
),
|
),
|
||||||
"gnfos": JsonChanHelper(
|
"gnfos": JsonChanHelper(
|
||||||
11,
|
11,
|
||||||
@ -175,7 +165,6 @@ CHANS = {
|
|||||||
(
|
(
|
||||||
"jp", "drive"
|
"jp", "drive"
|
||||||
),
|
),
|
||||||
rps=1 / 120
|
|
||||||
),
|
),
|
||||||
"synch": SynchJsonChanHelper(
|
"synch": SynchJsonChanHelper(
|
||||||
12,
|
12,
|
||||||
@ -187,7 +176,6 @@ CHANS = {
|
|||||||
"b", "d", "_r", "a", "_g", "mlp", "mu", "_tv", "vg",
|
"b", "d", "_r", "a", "_g", "mlp", "mu", "_tv", "vg",
|
||||||
"_wh", "old", "test"
|
"_wh", "old", "test"
|
||||||
),
|
),
|
||||||
rps=1 / 120
|
|
||||||
),
|
),
|
||||||
"tahta": JsonChanHelper(
|
"tahta": JsonChanHelper(
|
||||||
13,
|
13,
|
||||||
@ -198,7 +186,6 @@ CHANS = {
|
|||||||
(
|
(
|
||||||
"b", "g", "s", "v"
|
"b", "g", "s", "v"
|
||||||
),
|
),
|
||||||
rps=1 / 300
|
|
||||||
),
|
),
|
||||||
"awsumchan": JsonChanHelper(
|
"awsumchan": JsonChanHelper(
|
||||||
14,
|
14,
|
||||||
@ -209,7 +196,6 @@ CHANS = {
|
|||||||
(
|
(
|
||||||
"an", "aw", "cr", "fi", "ra", "au", "ga", "he", "sp"
|
"an", "aw", "cr", "fi", "ra", "au", "ga", "he", "sp"
|
||||||
),
|
),
|
||||||
rps=1 / 600
|
|
||||||
),
|
),
|
||||||
"horochan": MayuriChanHelper(
|
"horochan": MayuriChanHelper(
|
||||||
15,
|
15,
|
||||||
@ -218,7 +204,6 @@ CHANS = {
|
|||||||
(
|
(
|
||||||
"b",
|
"b",
|
||||||
),
|
),
|
||||||
rps=1 / 20
|
|
||||||
),
|
),
|
||||||
"doushio": DoushioHtmlChanHelper(
|
"doushio": DoushioHtmlChanHelper(
|
||||||
16,
|
16,
|
||||||
@ -229,7 +214,6 @@ CHANS = {
|
|||||||
(
|
(
|
||||||
"moe",
|
"moe",
|
||||||
),
|
),
|
||||||
rps=1 / 20
|
|
||||||
),
|
),
|
||||||
"desuchan": DesuChanHtmlChanHelper(
|
"desuchan": DesuChanHtmlChanHelper(
|
||||||
17,
|
17,
|
||||||
@ -245,7 +229,6 @@ CHANS = {
|
|||||||
"arrrrr", "brocastan", "gar", "gif", "media", "ot", "r", "w",
|
"arrrrr", "brocastan", "gar", "gif", "media", "ot", "r", "w",
|
||||||
"sandbox", "sugg"
|
"sandbox", "sugg"
|
||||||
),
|
),
|
||||||
rps=1 / 30
|
|
||||||
),
|
),
|
||||||
"aurorachan": DesuChanHtmlChanHelper(
|
"aurorachan": DesuChanHtmlChanHelper(
|
||||||
18,
|
18,
|
||||||
@ -257,7 +240,6 @@ CHANS = {
|
|||||||
"_bm", "de", "ic", "rp", "rpi", "v", "w", "tg",
|
"_bm", "de", "ic", "rp", "rpi", "v", "w", "tg",
|
||||||
"alt", "b", "g", "pkmn", "yuri", "fl", "mu", "sugg"
|
"alt", "b", "g", "pkmn", "yuri", "fl", "mu", "sugg"
|
||||||
),
|
),
|
||||||
rps=1 / 20
|
|
||||||
),
|
),
|
||||||
"tgchan": TgChanHtmlChanHelper(
|
"tgchan": TgChanHtmlChanHelper(
|
||||||
19,
|
19,
|
||||||
@ -268,7 +250,6 @@ CHANS = {
|
|||||||
(
|
(
|
||||||
"draw", "meep", "quest", "questdis", "tg", "icons",
|
"draw", "meep", "quest", "questdis", "tg", "icons",
|
||||||
),
|
),
|
||||||
rps=1 / 600,
|
|
||||||
),
|
),
|
||||||
"lolnada": LolNadaHtmlChanHelper(
|
"lolnada": LolNadaHtmlChanHelper(
|
||||||
20,
|
20,
|
||||||
@ -280,7 +261,6 @@ CHANS = {
|
|||||||
"b", "a", "aw", "cgl", "dw", "int", "qt", "sad", "t",
|
"b", "a", "aw", "cgl", "dw", "int", "qt", "sad", "t",
|
||||||
"toy", "v", "x", "34", "e", "f", "h"
|
"toy", "v", "x", "34", "e", "f", "h"
|
||||||
),
|
),
|
||||||
rps=1 / 60,
|
|
||||||
),
|
),
|
||||||
"fchan": FChanHtmlChanHelper(
|
"fchan": FChanHtmlChanHelper(
|
||||||
21,
|
21,
|
||||||
@ -291,7 +271,6 @@ CHANS = {
|
|||||||
(
|
(
|
||||||
"f", "m", "h", "s", "toon", "a", "ah", "c", "artist", "crit", "b"
|
"f", "m", "h", "s", "toon", "a", "ah", "c", "artist", "crit", "b"
|
||||||
),
|
),
|
||||||
rps=1 / 60,
|
|
||||||
),
|
),
|
||||||
"0chan": ZerochanHtmlChanHelper(
|
"0chan": ZerochanHtmlChanHelper(
|
||||||
22,
|
22,
|
||||||
@ -307,7 +286,6 @@ CHANS = {
|
|||||||
"poligon", "postach", "psih", "r", "rm", "s", "shrek", "shy", "t",
|
"poligon", "postach", "psih", "r", "rm", "s", "shrek", "shy", "t",
|
||||||
"test", "tlp", "tmp", "tv", "vg", "vipe", "wh", "xikkadvach", "ynet"
|
"test", "tlp", "tmp", "tv", "vg", "vipe", "wh", "xikkadvach", "ynet"
|
||||||
),
|
),
|
||||||
rps=1 / 5
|
|
||||||
),
|
),
|
||||||
"410chan": Chan410HtmlChanHelper(
|
"410chan": Chan410HtmlChanHelper(
|
||||||
23,
|
23,
|
||||||
@ -318,7 +296,6 @@ CHANS = {
|
|||||||
(
|
(
|
||||||
"d", "b", "cu", "dev", "r", "a", "ts", "ci"
|
"d", "b", "cu", "dev", "r", "a", "ts", "ci"
|
||||||
),
|
),
|
||||||
rps=1 / 120
|
|
||||||
),
|
),
|
||||||
"7chan": Chan7HtmlChanHelper(
|
"7chan": Chan7HtmlChanHelper(
|
||||||
24,
|
24,
|
||||||
@ -335,7 +312,6 @@ CHANS = {
|
|||||||
"elit", "fag", "fur", "gif", "h", "men", "pco", "s",
|
"elit", "fag", "fur", "gif", "h", "men", "pco", "s",
|
||||||
"sm", "ss", "unf", "v",
|
"sm", "ss", "unf", "v",
|
||||||
),
|
),
|
||||||
rps=1 / 30
|
|
||||||
),
|
),
|
||||||
"chanon": ChanonHtmlChanHelper(
|
"chanon": ChanonHtmlChanHelper(
|
||||||
25,
|
25,
|
||||||
@ -347,7 +323,6 @@ CHANS = {
|
|||||||
"a", "int", "j", "m", "pc", "pol", "prog", "tv",
|
"a", "int", "j", "m", "pc", "pol", "prog", "tv",
|
||||||
"b", "milo", "pr0n", "s", "c", "sug",
|
"b", "milo", "pr0n", "s", "c", "sug",
|
||||||
),
|
),
|
||||||
rps=1 / 60
|
|
||||||
),
|
),
|
||||||
"chanorg": JsonChanHelper(
|
"chanorg": JsonChanHelper(
|
||||||
26,
|
26,
|
||||||
@ -358,7 +333,6 @@ CHANS = {
|
|||||||
(
|
(
|
||||||
"b", "goys"
|
"b", "goys"
|
||||||
),
|
),
|
||||||
rps=1 / 60
|
|
||||||
),
|
),
|
||||||
"iichan": IichanHtmlChanHelper(
|
"iichan": IichanHtmlChanHelper(
|
||||||
27,
|
27,
|
||||||
@ -373,7 +347,6 @@ CHANS = {
|
|||||||
"aa", "abe", "c", "fi", "jp", "rm", "tan", "to", "ts",
|
"aa", "abe", "c", "fi", "jp", "rm", "tan", "to", "ts",
|
||||||
"vn", "vo", "misc"
|
"vn", "vo", "misc"
|
||||||
),
|
),
|
||||||
rps=1 / 10
|
|
||||||
),
|
),
|
||||||
"nowere": NowereHtmlChanHelper(
|
"nowere": NowereHtmlChanHelper(
|
||||||
28,
|
28,
|
||||||
@ -384,7 +357,6 @@ CHANS = {
|
|||||||
(
|
(
|
||||||
"b", "d", "tu", "a", "ph", "wa", "cg", "t", "p"
|
"b", "d", "tu", "a", "ph", "wa", "cg", "t", "p"
|
||||||
),
|
),
|
||||||
rps=1 / 60
|
|
||||||
),
|
),
|
||||||
"8kun2": JsonKunChanHelper(
|
"8kun2": JsonKunChanHelper(
|
||||||
35,
|
35,
|
||||||
@ -392,67 +364,84 @@ CHANS = {
|
|||||||
"https://media.8kun.top/",
|
"https://media.8kun.top/",
|
||||||
"/res/",
|
"/res/",
|
||||||
"file_store/",
|
"file_store/",
|
||||||
("1", "55chan", "_64chen", "8bantb", "8tube", "a", "_abdl2", "agdg", "amv", "aneki", "animu", "animus", "ara",
|
("1", "55chan", "_64chen", "8bantb", "8tube", "a", "_abdl2", "agdg", "_amv", "aneki", "animu", "animus", "ara",
|
||||||
"arda", "arms", "asatru", "asmr", "aus", "ausneets", "__b", "__baka", "_baneposting", "__baseballbat",
|
"arda", "_arms", "asatru", "_asmr", "aus", "ausneets", "_b", "_baka", "_baneposting", "_baseballbat",
|
||||||
"bcards", "bleached", "blog", "__bonehurtingjuice", "bq", "__brit", "bubblegum", "builders", "bunkers", "butt",
|
"_bcards", "bleached", "blog", "_bonehurtingjuice", "_bq", "_brit", "bubblegum", "builders", "bunkers", "butt",
|
||||||
"cafechan", "caffe", "canada", "cath", "chori", "choroy", "christian", "christianity", "christianmeme",
|
"cafechan", "caffe", "canada", "_cath", "chori", "choroy", "christian", "christianity", "_christianmeme",
|
||||||
"cicachan", "civicrs", "ck", "cloveros", "co", "cow", "__cuckquean", "cute", "cyber", "cyoa", "__czech",
|
"cicachan", "civicrs", "ck", "cloveros", "co", "cow", "_cuckquean", "cute", "cyber", "cyoa", "_czech",
|
||||||
"dadtalk", "danpu", "dao101", "degen", "delete", "dempart", "desu", "diaperfags", "diaperfetish", "dir",
|
"_dadtalk", "danpu", "dao101", "degen", "delete", "dempart", "desu", "diaperfags", "diaperfetish", "dir",
|
||||||
"__dolphin", "dpfag", "_dpr", "druid", "_e9y", "eatme", "ebola", "eerie", "egy", "egypt", "etika", "eu",
|
"_dolphin", "_dpfag", "_dpr", "druid", "_e9y", "_eatme", "ebola", "eerie", "egy", "egypt", "_etika", "_eu",
|
||||||
"euskotxa", "__exit", "f1", "fa", "fairy", "fallen", "fast", "faygo", "feet", "femaledomination", "feri",
|
"_euskotxa", "_exit", "f1", "fa", "_fairy", "fallen", "fast", "faygo", "feet", "femaledomination", "feri",
|
||||||
"__fightcomms", "film", "flemish", "floss", "fortnite", "freedomzine", "fukemo", "fumo", "fur", "furry", "g",
|
"_fightcomms", "film", "flemish", "_floss", "fortnite", "freedomzine", "fukemo", "fumo", "fur", "furry", "g",
|
||||||
"gamergatehq", "genesis", "_gesu", "ggis", "girltalk", "greenbreeze", "gts", "haxxor", "hentai", "hentaiclub",
|
"gamergatehq", "genesis", "_gesu", "_ggis", "girltalk", "greenbreeze", "gts", "_haxxor", "hentai",
|
||||||
"__herm", "hermetics", "hgb", "hgg", "__hindu", "hisparefugio", "hissss", "hnt", "hover", "hybrids", "hydrus",
|
"hentaiclub", "_herm", "_hermetics", "_hgb", "hgg", "_hindu", "hisparefugio", "_hissss", "hnt", "hover",
|
||||||
"hypno", "_hypnochan", "icup", "imperium", "in", "ipfs", "ircsecrets", "islam", "ita", "jaooo", "jewess",
|
"hybrids", "_hydrus", "hypno", "_hypnochan", "icup", "imperium", "in", "ipfs", "ircsecrets", "islam", "ita",
|
||||||
"jmaatv", "joker", "jp", "k", "_kekforceusa", "kemono", "kocsog", "kohlchan", "__(komica)", "_komika", "kpop",
|
"_jaooo", "jewess", "_jmaatv", "_joker", "jp", "k", "_kekforceusa", "kemono", "kocsog", "kohlchan",
|
||||||
"lain", "_lego", "leo", "lewd", "lit", "lol", "loomis", "loroy", "luddite", "magick", "maka", "mde",
|
"_(komica)", "_komika", "kpop", "lain", "_lego", "leo", "lewd", "lit", "_lol", "loomis", "_loroy", "luddite",
|
||||||
"merrychristmas", "miku", "milf", "mom", "monster", "msb", "mtb", "mtt", "mu", "n0thingness", "nanachi",
|
"magick", "maka", "mde", "_merrychristmas", "_miku", "milf", "_mom", "monster", "_msb", "mtb", "mtt", "mu",
|
||||||
"natiofr", "nep", "newbrit", "newsplus", "nobody", "nofap", "nofur", "nogatco", "nothingness", "ntr", "_nuke8",
|
"_n0thingness", "_nanachi", "natiofr", "nep", "newbrit", "newsplus", "_nobody", "nofap", "_nofur", "_nogatco",
|
||||||
"oanda", "__ocb", "__ocult", "_omorashi", "opmk", "os", "otter", "p", "panconleche", "pdfs", "__peaceofmind",
|
"nothingness", "ntr", "_nuke8", "_oanda", "_ocb", "_ocult", "_omorashi", "_opmk", "os", "otter", "p",
|
||||||
"pen", "philosophy", "_pkmns", "pnd", "pokeporn", "polymath", "pone", "projectdcomms", "__pyatibrat", "_qm",
|
"_panconleche", "pdfs", "_peaceofmind", "pen", "philosophy", "_pkmns", "pnd", "pokeporn", "polymath", "pone",
|
||||||
"qpatriotresearch", "__qresearch", "qrnews", "__rand21", "rec", "rmart", "rusrandom", "rzabczan", "s", "s8s",
|
"projectdcomms", "_pyatibrat", "_qm", "qpatriotresearch", "qresearch", "qrnews", "_rand21", "rec", "rmart",
|
||||||
"sag", "sapphic", "shousa", "sikhi", "sip", "sl", "_snowboarding", "socpl", "strek", "subs", "__sve", "t",
|
"_rusrandom", "rzabczan", "s", "s8s", "_sag", "sapphic", "shousa", "_sikhi", "sip", "sl", "_snowboarding",
|
||||||
"tan", "tdt", "tech9", "techan", "techbunker", "tek", "templeos", "tenda", "teraha", "__texit", "tf2", "__tg",
|
"socpl", "strek", "_subs", "_sve", "t", "tan", "tdt", "_tech9", "_techan", "techbunker", "_tek", "templeos",
|
||||||
"_thb", "thedickshow", "throat", "_tibby", "tikilounge", "tkr", "tr55", "__trashcollector", "truthlegion",
|
"tenda", "teraha", "_texit", "tf2", "_tg", "_thb", "_thedickshow", "throat", "_tibby", "tikilounge", "tkr",
|
||||||
"tulpamancers", "turul", "tutturu", "tv", "u", "uaco", "_ucla", "underground", "__usersunion", "v", "vichan",
|
"_tr55", "_trashcollector", "truthlegion", "tulpamancers", "turul", "tutturu", "tv", "u", "_uaco", "_ucla",
|
||||||
"vietkong", "vietnam", "vore", "vr", "_warposting", "wdsc", "webm", "wg", "__wga", "wikieat", "wis", "wmafsex",
|
"underground", "_usersunion", "v", "vichan", "_vietkong", "vietnam", "vore", "vr", "_warposting", "wdsc",
|
||||||
"workrelated", "wqt", "wx", "x", "__xivl", "__xtian", "zoomerright", "zundel", "0", "55sync", "abdl",
|
"webm", "wg", "_wga", "wikieat", "wis", "wmafsex", "_workrelated", "_wqt", "wx", "x", "_xivl", "_xtian",
|
||||||
"alleycat", "_arisu", "arisubunker", "_arp", "bane", "_bimbohypnosis", "_bluemoon", "bmn", "brains", "cats",
|
"_zoomerright", "zundel", "0", "55sync", "abdl", "alleycat", "_arisu", "_arisubunker", "_arp", "_bane",
|
||||||
"_chance", "clang", "comfy", "critters", "_cursed", "_cvine", "cze", "d", "dcaco", "demonp", "_dnmd", "doomer",
|
"_bimbohypnosis", "_bluemoon", "bmn", "brains", "cats", "_chance", "clang", "comfy", "_critters", "_cursed",
|
||||||
"doot", "elitabla", "_empanada", "erp", "_falseflags", "fashionplus", "fata", "femdom", "fit", "_flg",
|
"_cvine", "_cze", "d", "dcaco", "_demonp", "_dnmd", "doomer", "doot", "elitabla", "_empanada", "erp",
|
||||||
"_fr8chan", "futyitorna", "garrett", "_giantesshentai", "hentaiporn", "hmfr", "hooliedayz", "hsp", "hujszon",
|
"_falseflags", "fashionplus", "_fata", "femdom", "fit", "_flg", "_fr8chan", "futyitorna", "garrett",
|
||||||
"iep", "just", "k46", "kind", "_kiwc", "kukichan", "_lacajita", "_legos", "lgd", "liveanarchy",
|
"_giantesshentai", "hentaiporn", "_hmfr", "hooliedayz", "hsp", "_hujszon", "_iep", "just", "k46", "_kind",
|
||||||
"luciddreaming", "m", "_mapp", "mental", "_mets", "_milhis", "monarchy", "_myon", "newhomosuck", "newsci",
|
"_kiwc", "kukichan", "_lacajita", "_legos", "_lgd", "liveanarchy", "_luciddreaming", "m", "_mapp", "mental",
|
||||||
"_nine", "oes", "onepiece", "_other369", "otomad", "_penguware", "psyid", "qresearch2gen", "rule34",
|
"_mets", "_milhis", "monarchy", "_myon", "newhomosuck", "newsci", "_nine", "_oes", "_onepiece", "_other369",
|
||||||
"_satorare", "sonyeon", "split", "sunflower", "_tae", "test", "_tft", "tftg", "toy", "trap", "_vein",
|
"_otomad", "_penguware", "psyid", "qresearch2gen", "rule34", "_satorare", "sonyeon", "split", "_sunflower",
|
||||||
"_virtualreality", "vivian", "voros", "wbr", "_weird", "wooo", "yuuka", "fringe", "random", "cuteboys", "tech",
|
"_tae", "test", "_tft", "tftg", "toy", "trap", "_vein", "_virtualreality", "vivian", "voros", "wbr", "_weird",
|
||||||
"internatiomall", "interracial", "liberty", "htg", "mai", "komica", "cutebois", "argentina", "r", "tf",
|
"wooo", "yuuka", "fringe", "random", "cuteboys", "tech", "_internatiomall", "interracial", "liberty", "htg",
|
||||||
"draftnote", "abcu", "k117", "britfeel", "liberty", "htg", "mai", "komica", "cutebois", "argentina", "r", "tf",
|
"mai", "komica", "cutebois", "argentina", "r", "tf", "draftnote", "abcu", "_k117", "britfeel", "liberty",
|
||||||
"draftnote", "abcu", "k117", "britfeel", "y", "an", "francofil", "portal", "royalhawk", "vdm", "bullmask",
|
"htg", "mai", "komica", "cutebois", "argentina", "r", "tf", "draftnote", "abcu", "_k117", "britfeel", "y",
|
||||||
"imouto", "tripfriend", "arepa", "rwby", "sw", "y", "an", "francofil", "portal", "royalhawk", "vdm",
|
"an", "francofil", "portal", "_royalhawk", "_vdm", "_bullmask", "imouto", "tripfriend", "arepa", "rwby", "sw",
|
||||||
"bullmask", "imouto", "tripfriend", "arepa", "rwby", "sw", "magali", "hikki", "biz", "eris", "india", "mg",
|
"y", "an", "francofil", "portal", "_royalhawk", "_vdm", "_bullmask", "imouto", "tripfriend", "arepa", "rwby",
|
||||||
"magali", "hikki", "biz", "eris", "india", "mg", "out", "infinity", "tifa", "muslim", "out", "infinity",
|
"sw", "magali", "hikki", "biz", "eris", "india", "mg", "magali", "hikki", "biz", "eris", "india", "mg", "out",
|
||||||
"tifa", "muslim", "slackware", "archivo", "flatearth", "yaoi", "boombox", "wdp", "thedonald",
|
"_infinity", "tifa", "_muslim", "out", "_infinity", "tifa", "_muslim", "slackware", "archivo", "_flatearth",
|
||||||
"libertedexpression", "khyber", "jsr", "slackware", "archivo", "flatearth", "yaoi", "boombox", "wdp",
|
"_yaoi", "_boombox", "_wdp", "thedonald", "libertedexpression", "_khyber", "jsr", "slackware", "archivo",
|
||||||
"thedonald", "libertedexpression", "khyber", "jsr", "fso", "wumpawhip", "buddhismhotline", "indochinaexpats",
|
"_flatearth", "_yaoi", "_boombox", "_wdp", "thedonald", "libertedexpression", "_khyber", "jsr", "fso",
|
||||||
"ett", "redbar", "skyline350gt", "asc", "bazafx", "bestkorea", "covid19", "sokra", "bowsu", "qpatriotsunited",
|
"wumpawhip", "_buddhismhotline", "indochinaexpats", "_ett", "_redbar", "_skyline350gt", "_asc", "bazafx",
|
||||||
"verzet", "wlctint", "cultstate", "melody", "vedic", "yhvh", "1cok", "astropolis", "fso", "wumpawhip",
|
"bestkorea", "covid19", "_sokra", "_bowsu", "_qpatriotsunited", "_verzet", "_wlctint", "_cultstate", "_melody",
|
||||||
"buddhismhotline", "indochinaexpats", "ett", "redbar", "skyline350gt", "asc", "bazafx", "bestkorea", "covid19",
|
"_vedic", "yhvh", "1cok", "_astropolis", "fso", "wumpawhip", "_buddhismhotline", "indochinaexpats", "_ett",
|
||||||
"sokra", "bowsu", "qpatriotsunited", "verzet", "wlctint", "cultstate", "melody", "vedic", "yhvh", "1cok",
|
"_redbar", "_skyline350gt", "_asc", "bazafx", "bestkorea", "covid19", "_sokra", "_bowsu", "_qpatriotsunited",
|
||||||
"astropolis", "earthlibfront", "pardochan", "stanislawowski", "thetrump", "yukkuri", "1825kun", "cryptobtc",
|
"_verzet", "_wlctint", "_cultstate", "_melody", "_vedic", "yhvh", "1cok", "_astropolis", "_earthlibfront",
|
||||||
"isol", "knights", "language", "rr34", "sperg", "awaken", "belgium", "blizzard", "brain", "buddha", "dbs",
|
"_pardochan", "_stanislawowski", "_thetrump", "yukkuri", "1825kun", "cryptobtc", "_isol", "_knights",
|
||||||
"deestevensvoice4you", "f4net", "fuckuchina", "gbtv", "hairygirls", "hallaca", "homeowner", "indo", "jersey",
|
"language", "_rr34", "_sperg", "_awaken", "_belgium", "_blizzard", "_brain", "buddha", "_dbs",
|
||||||
"jigglypuff", "lbt", "madh4ckrs", "medcorp", "miamichan", "mrsfrisby", "mulatto", "mupro", "nhoodlink",
|
"_deestevensvoice4you", "_f4net", "_fuckuchina", "_gbtv", "hairygirls", "_hallaca", "_homeowner", "indo",
|
||||||
"p5porn", "patriotrevolution", "peko", "projectobject", "prop", "pups", "qanonspain", "qcastellano",
|
"_jersey", "_jigglypuff", "_lbt", "_madh4ckrs", "_medcorp", "_miamichan", "mrsfrisby", "_mulatto", "_mupro",
|
||||||
"earthlibfront", "pardochan", "stanislawowski", "thetrump", "yukkuri", "1825kun", "cryptobtc", "isol",
|
"_nhoodlink", "_p5porn", "_patriotrevolution", "_peko", "_projectobject", "_prop", "pups", "_qanonspain",
|
||||||
"knights", "language", "rr34", "sperg", "awaken", "belgium", "blizzard", "brain", "buddha", "dbs",
|
"_qcastellano", "_earthlibfront", "_pardochan", "_stanislawowski", "_thetrump", "yukkuri", "1825kun",
|
||||||
"deestevensvoice4you", "f4net", "fuckuchina", "gbtv", "hairygirls", "hallaca", "homeowner", "indo", "jersey",
|
"cryptobtc", "_isol", "_knights", "language", "_rr34", "_sperg", "_awaken", "_belgium", "_blizzard", "_brain",
|
||||||
"jigglypuff", "lbt", "madh4ckrs", "medcorp", "miamichan", "mrsfrisby", "mulatto", "mupro", "nhoodlink",
|
"buddha", "_dbs", "_deestevensvoice4you", "_f4net", "_fuckuchina", "_gbtv", "hairygirls", "_hallaca",
|
||||||
"p5porn", "patriotrevolution", "peko", "projectobject", "prop", "pups", "qanonspain", "qcastellano", "qsocial",
|
"_homeowner", "indo", "_jersey", "_jigglypuff", "_lbt", "_madh4ckrs", "_medcorp", "_miamichan", "mrsfrisby",
|
||||||
"resist", "revolu", "skemt", "sketheory", "spaceforce", "surro", "thehand", "transit", "vitaecryptocurrency",
|
"_mulatto", "_mupro", "_nhoodlink", "_p5porn", "_patriotrevolution", "_peko", "_projectobject", "_prop",
|
||||||
"qsocial", "resist", "revolu", "skemt", "sketheory", "spaceforce", "surro", "thehand", "transit",
|
"pups", "_qanonspain", "_qcastellano", "qsocial", "_resist", "_revolu", "_skemt", "_sketheory", "_spaceforce",
|
||||||
"vitaecryptocurrency"),
|
"_surro", "_thehand", "_transit", "_vitaecryptocurrency", "qsocial", "_resist", "_revolu", "_skemt",
|
||||||
rps=1 / 3
|
"_sketheory", "_spaceforce", "_surro", "_thehand", "_transit", "_vitaecryptocurrency", "midnightriders",
|
||||||
|
"tingles", "1cc", "prog", "ytc", "arcagayghetto", "prog", "ytc", "arcagayghetto", "2hu", "o", "warroom", "2hu",
|
||||||
|
"o", "warroom", "ebon", "xiaomicha", "ebon", "xiaomicha", "gnosticwarfare", "moldnet", "zenczan", "cosplay",
|
||||||
|
"otakus", "nohup", "frenzone", "8dixie", "hqa", "pundit", "vrgg", "uf0", "malaysia", "gnosticwarfare",
|
||||||
|
"moldnet", "zenczan", "cosplay", "otakus", "nohup", "frenzone", "8dixie", "hqa", "pundit", "vrgg", "uf0",
|
||||||
|
"malaysia", "instruments", "unlightopen", "pso2g", "jozsicsan", "komijoke", "bmsgeu", "92k", "komicaz", "pcal",
|
||||||
|
"accent", "wethepatriots", "porussia", "1a", "tarhana", "bigwomen", "maths", "instruments", "unlightopen",
|
||||||
|
"pso2g", "jozsicsan", "komijoke", "bmsgeu", "92k", "komicaz", "pcal", "accent", "wethepatriots", "porussia",
|
||||||
|
"1a", "tarhana", "bigwomen", "maths", "coffeetalk", "arcader", "kingcrimson", "moonlight", "trkey", "whogen",
|
||||||
|
"xivlgr", "amichan", "gendercritical", "inflg", "komicalol", "capcom", "coser", "cud", "feedism", "grc",
|
||||||
|
"reimuchan", "stalker2", "2020istheyear", "carib", "jumpchen", "mishmash", "qbl", "sakurachan", "satsukichan",
|
||||||
|
"taodick", "aes", "gacha", "nfl2", "redlands", "traditionalcatholics", "tsiou", "airsoft2", "animation",
|
||||||
|
"cafardchan", "chrstdis", "coffeetalk", "arcader", "kingcrimson", "moonlight", "trkey", "whogen", "xivlgr",
|
||||||
|
"amichan", "gendercritical", "inflg", "komicalol", "capcom", "coser", "cud", "feedism", "grc", "reimuchan",
|
||||||
|
"stalker2", "2020istheyear", "carib", "jumpchen", "mishmash", "qbl", "sakurachan", "satsukichan", "taodick",
|
||||||
|
"aes", "gacha", "nfl2", "redlands", "traditionalcatholics", "tsiou", "airsoft2", "animation", "cafardchan",
|
||||||
|
"chrstdis", "komicamc", "marista", "neetpride", "numis", "progmusic", "retrogaminggifs", "warcraft2004",
|
||||||
|
"komicamc", "marista", "neetpride", "numis", "progmusic", "retrogaminggifs", "warcraft2004"),
|
||||||
),
|
),
|
||||||
"hispachan": HispachanHtmlHelper(
|
"hispachan": HispachanHtmlHelper(
|
||||||
30,
|
30,
|
||||||
@ -466,7 +455,6 @@ CHANS = {
|
|||||||
"cl", "co", "ec", "es", "mx", "pe", "py", "uy", "ve", "d",
|
"cl", "co", "ec", "es", "mx", "pe", "py", "uy", "ve", "d",
|
||||||
"h", "o", "s", "sar", "scl", "sco", "ses", "smx", "spe", "sve",
|
"h", "o", "s", "sar", "scl", "sco", "ses", "smx", "spe", "sve",
|
||||||
),
|
),
|
||||||
rps=1 / 20
|
|
||||||
),
|
),
|
||||||
"sushigirl": JsonChanHelper(
|
"sushigirl": JsonChanHelper(
|
||||||
31,
|
31,
|
||||||
@ -478,7 +466,6 @@ CHANS = {
|
|||||||
"archive", "wildcard", "lounge", "arcade", "kawaii",
|
"archive", "wildcard", "lounge", "arcade", "kawaii",
|
||||||
"kitchen", "tunes", "culture", "silicon", "yakuza", "hell", "lewd"
|
"kitchen", "tunes", "culture", "silicon", "yakuza", "hell", "lewd"
|
||||||
),
|
),
|
||||||
rps=1 / 30
|
|
||||||
),
|
),
|
||||||
"4kev": Kev4PhpHelper(
|
"4kev": Kev4PhpHelper(
|
||||||
32,
|
32,
|
||||||
@ -491,7 +478,6 @@ CHANS = {
|
|||||||
"politics", "programming", "random", "technology",
|
"politics", "programming", "random", "technology",
|
||||||
"television", "videogames",
|
"television", "videogames",
|
||||||
),
|
),
|
||||||
rps=1 / 20
|
|
||||||
),
|
),
|
||||||
"plus4chan": Plus4ChanHelper(
|
"plus4chan": Plus4ChanHelper(
|
||||||
33,
|
33,
|
||||||
@ -503,7 +489,6 @@ CHANS = {
|
|||||||
"baw", "co", "cog", "jam", "mtv",
|
"baw", "co", "cog", "jam", "mtv",
|
||||||
"coc", "draw", "pco", "coq", "cod", "a"
|
"coc", "draw", "pco", "coq", "cod", "a"
|
||||||
),
|
),
|
||||||
rps=1 / 15
|
|
||||||
),
|
),
|
||||||
"2chan": Chan2Helper(
|
"2chan": Chan2Helper(
|
||||||
34,
|
34,
|
||||||
@ -628,7 +613,6 @@ CHANS = {
|
|||||||
"oe", # ??? お絵sql
|
"oe", # ??? お絵sql
|
||||||
"72", # ??? お絵sqlip
|
"72", # ??? お絵sqlip
|
||||||
),
|
),
|
||||||
rps=1 / 3
|
|
||||||
),
|
),
|
||||||
"waifuist": LynxChanHelper(
|
"waifuist": LynxChanHelper(
|
||||||
36,
|
36,
|
||||||
@ -639,7 +623,6 @@ CHANS = {
|
|||||||
(
|
(
|
||||||
"w", "starlet", "etc",
|
"w", "starlet", "etc",
|
||||||
),
|
),
|
||||||
rps=1 / 25
|
|
||||||
),
|
),
|
||||||
"cutiegarden": LynxChanHelper(
|
"cutiegarden": LynxChanHelper(
|
||||||
37,
|
37,
|
||||||
@ -650,7 +633,6 @@ CHANS = {
|
|||||||
(
|
(
|
||||||
"lg", "cozy", "meta", "test"
|
"lg", "cozy", "meta", "test"
|
||||||
),
|
),
|
||||||
rps=1 / 25
|
|
||||||
),
|
),
|
||||||
"9chan": JsonInfinityNextChanHelper(
|
"9chan": JsonInfinityNextChanHelper(
|
||||||
38,
|
38,
|
||||||
@ -737,6 +719,5 @@ CHANS = {
|
|||||||
"politicallyincorrect", "hockey", "randb", "traps", "vichan", "ircsecrets", "bosartest111111", "chib",
|
"politicallyincorrect", "hockey", "randb", "traps", "vichan", "ircsecrets", "bosartest111111", "chib",
|
||||||
"testing1234fake", "mdma", "virgo", "homo", "scum", "anal", "gamerhatehq", "vagina", "dump", "advert",
|
"testing1234fake", "mdma", "virgo", "homo", "scum", "anal", "gamerhatehq", "vagina", "dump", "advert",
|
||||||
"jueggin", "kike", "type", "robot", "goodguys", "ween", "bankfraudaccountloading", "vhsch"),
|
"jueggin", "kike", "type", "robot", "goodguys", "ween", "bankfraudaccountloading", "vhsch"),
|
||||||
rps=1 / 10
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
import json
|
import json
|
||||||
from json import JSONDecodeError
|
from json import JSONDecodeError
|
||||||
|
|
||||||
|
from hexlib.log import logger
|
||||||
|
|
||||||
from chan.helper import ChanHelper
|
from chan.helper import ChanHelper
|
||||||
from post_process import get_links_from_body
|
from post_process import get_links_from_body
|
||||||
from util import logger
|
|
||||||
|
|
||||||
|
|
||||||
class JsonChanHelper(ChanHelper):
|
class JsonChanHelper(ChanHelper):
|
||||||
|
@ -2,14 +2,13 @@ from bs4 import BeautifulSoup
|
|||||||
|
|
||||||
|
|
||||||
class ChanHelper:
|
class ChanHelper:
|
||||||
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps):
|
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards):
|
||||||
self.db_id = db_id
|
self.db_id = db_id
|
||||||
self._base_url = base_url
|
self._base_url = base_url
|
||||||
self._image_url = image_url
|
self._image_url = image_url
|
||||||
self._thread_path = thread_path
|
self._thread_path = thread_path
|
||||||
self._image_path = image_path
|
self._image_path = image_path
|
||||||
self._boards = boards
|
self._boards = boards
|
||||||
self.rps = rps
|
|
||||||
self.get_method = None
|
self.get_method = None
|
||||||
self.save_folder = None
|
self.save_folder = None
|
||||||
|
|
||||||
@ -37,7 +36,7 @@ class ChanHelper:
|
|||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def item_unique_id(self, item, board):
|
def item_unique_id(self, item, board):
|
||||||
return int(self.board_hash(board) + str(self.item_id(item)))
|
return board + str(self.item_id(item))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def thread_mtime(thread):
|
def thread_mtime(thread):
|
||||||
|
@ -3,9 +3,9 @@ import re
|
|||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from hexlib.log import logger
|
||||||
|
|
||||||
from chan.desuchan_html import DesuChanHtmlChanHelper
|
from chan.desuchan_html import DesuChanHtmlChanHelper
|
||||||
from util import logger
|
|
||||||
|
|
||||||
|
|
||||||
def _ts(text):
|
def _ts(text):
|
||||||
|
@ -3,9 +3,10 @@ from urllib.parse import urljoin
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
from hexlib.log import logger
|
||||||
|
|
||||||
from chan.helper import ChanHelper
|
from chan.helper import ChanHelper
|
||||||
from post_process import get_links_from_body
|
from post_process import get_links_from_body
|
||||||
from util import logger
|
|
||||||
|
|
||||||
|
|
||||||
class JsonInfinityNextChanHelper(ChanHelper):
|
class JsonInfinityNextChanHelper(ChanHelper):
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from vanwanet_scrape.scraper import Scraper
|
from vanwanet_scrape.scraper import Scraper
|
||||||
|
|
||||||
from chan.chan_json import JsonChanHelper
|
from chan.chan_json import JsonChanHelper
|
||||||
from util import logger
|
from hexlib.log import logger
|
||||||
|
|
||||||
|
|
||||||
class JsonKunChanHelper(JsonChanHelper):
|
class JsonKunChanHelper(JsonChanHelper):
|
||||||
@ -10,8 +10,8 @@ class JsonKunChanHelper(JsonChanHelper):
|
|||||||
def item_type(item):
|
def item_type(item):
|
||||||
return "thread" if item["resto"] == 0 else "post"
|
return "thread" if item["resto"] == 0 else "post"
|
||||||
|
|
||||||
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps):
|
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards):
|
||||||
super().__init__(db_id, base_url, image_url, thread_path, image_path, boards, rps)
|
super().__init__(db_id, base_url, image_url, thread_path, image_path, boards)
|
||||||
|
|
||||||
self._scraper = Scraper(
|
self._scraper = Scraper(
|
||||||
headers={
|
headers={
|
||||||
|
@ -7,14 +7,14 @@ import cloudscraper
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
from chan.helper import ChanHelper
|
from chan.helper import ChanHelper
|
||||||
from util import logger
|
from hexlib.log import logger
|
||||||
|
|
||||||
|
|
||||||
class LynxChanHelper(ChanHelper):
|
class LynxChanHelper(ChanHelper):
|
||||||
"""See https://gitgud.io/LynxChan/LynxChan/blob/master/doc/Json.txt"""
|
"""See https://gitgud.io/LynxChan/LynxChan/blob/master/doc/Json.txt"""
|
||||||
|
|
||||||
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards, rps):
|
def __init__(self, db_id, base_url, image_url, thread_path, image_path, boards):
|
||||||
super().__init__(db_id, base_url, image_url, thread_path, image_path, boards, rps)
|
super().__init__(db_id, base_url, image_url, thread_path, image_path, boards)
|
||||||
|
|
||||||
scraper = cloudscraper.create_scraper()
|
scraper = cloudscraper.create_scraper()
|
||||||
if len(sys.argv) > 3:
|
if len(sys.argv) > 3:
|
||||||
|
@ -3,13 +3,13 @@ from json import JSONDecodeError
|
|||||||
|
|
||||||
from chan.helper import ChanHelper
|
from chan.helper import ChanHelper
|
||||||
from post_process import get_links_from_body
|
from post_process import get_links_from_body
|
||||||
from util import logger
|
from hexlib.log import logger
|
||||||
|
|
||||||
|
|
||||||
class MayuriChanHelper(ChanHelper):
|
class MayuriChanHelper(ChanHelper):
|
||||||
|
|
||||||
def __init__(self, db_id, base_url, image_url, boards, rps):
|
def __init__(self, db_id, base_url, image_url, boards):
|
||||||
super().__init__(db_id, base_url, image_url, None, None, boards, rps)
|
super().__init__(db_id, base_url, image_url, None, None, boards)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def item_id(item):
|
def item_id(item):
|
||||||
|
@ -3,8 +3,7 @@ from json import JSONDecodeError
|
|||||||
|
|
||||||
from chan.helper import ChanHelper
|
from chan.helper import ChanHelper
|
||||||
from post_process import get_links_from_body
|
from post_process import get_links_from_body
|
||||||
from util import logger
|
from hexlib.log import logger
|
||||||
|
|
||||||
|
|
||||||
class RussianJsonChanHelper(ChanHelper):
|
class RussianJsonChanHelper(ChanHelper):
|
||||||
|
|
||||||
|
@ -36,6 +36,10 @@ class TgChanHtmlChanHelper(DesuChanHtmlChanHelper):
|
|||||||
posts = []
|
posts = []
|
||||||
for post_el in op_el.find_all("table", recursive=False):
|
for post_el in op_el.find_all("table", recursive=False):
|
||||||
*_, time = post_el.find("label").children
|
*_, time = post_el.find("label").children
|
||||||
|
|
||||||
|
if post_el.get("class") and "userdelete" in post_el.get("class"):
|
||||||
|
continue
|
||||||
|
|
||||||
posts.append({
|
posts.append({
|
||||||
"id": int(post_el.find("td", attrs={"class", "reply"}).get("id")[5:]),
|
"id": int(post_el.find("td", attrs={"class", "reply"}).get("id")[5:]),
|
||||||
"type": "post",
|
"type": "post",
|
||||||
|
@ -1,384 +1,247 @@
|
|||||||
version: "2.1"
|
version: "3"
|
||||||
volumes:
|
|
||||||
influxdb_data:
|
|
||||||
pg_data:
|
|
||||||
pg_data_imhash:
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
influxdb:
|
|
||||||
image: influxdb:alpine
|
|
||||||
volumes:
|
|
||||||
- influxdb_data:/var/lib/influxdb
|
|
||||||
grafana:
|
|
||||||
image: grafana/grafana
|
|
||||||
ports:
|
|
||||||
- 127.0.0.1:3006:3000
|
|
||||||
environment:
|
|
||||||
- "GF_SECURITY_ADMIN_PASSWORD=changeme"
|
|
||||||
db:
|
|
||||||
image: postgres
|
|
||||||
volumes:
|
|
||||||
- pg_data:/var/lib/postgresql/data
|
|
||||||
environment:
|
|
||||||
- "POSTGRES_USER=feed_archiver"
|
|
||||||
- "POSTGRES_PASSWORD=changeme"
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD-SHELL", "pg_isready -U feed_archiver"]
|
|
||||||
interval: 5s
|
|
||||||
timeout: 5s
|
|
||||||
retries: 5
|
|
||||||
db_imhashdb:
|
|
||||||
image: simon987/pg_hamming
|
|
||||||
volumes:
|
|
||||||
- pg_data_imhash:/var/lib/postgresql/data
|
|
||||||
environment:
|
|
||||||
- "POSTGRES_USER=imhashdb"
|
|
||||||
- "POSTGRES_PASSWORD=changeme"
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD-SHELL", "pg_isready -U imhashdb"]
|
|
||||||
interval: 5s
|
|
||||||
timeout: 5s
|
|
||||||
retries: 5
|
|
||||||
redis:
|
|
||||||
image: redis
|
|
||||||
archiver:
|
|
||||||
image: simon987/feed_archiver
|
|
||||||
restart: always
|
|
||||||
depends_on:
|
|
||||||
db:
|
|
||||||
condition: service_healthy
|
|
||||||
environment:
|
|
||||||
- "FA_DB_HOST=db"
|
|
||||||
- "FA_DB_USER=feed_archiver"
|
|
||||||
- "FA_DB_PASSWORD=changeme"
|
|
||||||
- "FA_REDIS_ADDR=redis:6379"
|
|
||||||
- "FA_PATTERN=arc.*"
|
|
||||||
imhashdb:
|
|
||||||
image: simon987/imhashdb
|
|
||||||
restart: always
|
|
||||||
entrypoint: "/build/imhashdb/cli/cli hasher"
|
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
|
||||||
- "IMHASHDB_STORE=/data"
|
|
||||||
- "IMHASHDB_REDIS_ADDR=redis:6379"
|
|
||||||
- "IMHASHDB_PG_USER=imhashdb"
|
|
||||||
- "IMHASHDB_PG_PASSWORD=changeme"
|
|
||||||
- "IMHASHDB_PG_DATABASE=imhashdb"
|
|
||||||
- "IMHASHDB_PG_HOST=db_imhashdb"
|
|
||||||
- "IMHASHDB_HASH_CONCURRENCY=16"
|
|
||||||
# Image boards
|
|
||||||
4chan:
|
4chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=4chan"
|
- "CF_CHAN=4chan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
0chan:
|
0chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=0chan"
|
- "CF_CHAN=0chan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
22chan:
|
22chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=22chan"
|
- "CF_CHAN=22chan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
2chan:
|
2chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=2chan"
|
- "CF_CHAN=2chan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
|
|
||||||
2chhk:
|
2chhk:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=2chhk"
|
- "CF_CHAN=2chhk"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
38chan:
|
38chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=38chan"
|
- "CF_CHAN=38chan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
410chan:
|
410chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=410chan"
|
- "CF_CHAN=410chan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
4kev:
|
4kev:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=4kev"
|
- "CF_CHAN=4kev"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
7chan:
|
7chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=7chan"
|
- "CF_CHAN=7chan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
8kun:
|
8kun:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=8kun"
|
- "CF_CHAN=8kun"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
alokal:
|
alokal:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=alokal"
|
- "CF_CHAN=alokal"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
aurorachan:
|
aurorachan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=aurorachan"
|
- "CF_CHAN=aurorachan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
awsumchan:
|
awsumchan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=awsumchan"
|
- "CF_CHAN=awsumchan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
chanon:
|
chanon:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=chanon"
|
- "CF_CHAN=chanon"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
chanorg:
|
chanorg:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=chanorg"
|
- "CF_CHAN=chanorg"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
desuchan:
|
desuchan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=desuchan"
|
- "CF_CHAN=desuchan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
doushio:
|
doushio:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=doushio"
|
- "CF_CHAN=doushio"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
endchan:
|
endchan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=endchan"
|
- "CF_CHAN=endchan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
fchan:
|
fchan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=fchan"
|
- "CF_CHAN=fchan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
gnfos:
|
gnfos:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=gnfos"
|
- "CF_CHAN=gnfos"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
hispachan:
|
hispachan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=hispachan"
|
- "CF_CHAN=hispachan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
horochan:
|
horochan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=horochan"
|
- "CF_CHAN=horochan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
iichan:
|
iichan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=iichan"
|
- "CF_CHAN=iichan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
lainchan:
|
lainchan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=lainchan"
|
- "CF_CHAN=lainchan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
lolnada:
|
lolnada:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=lolnada"
|
- "CF_CHAN=lolnada"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
nowere:
|
nowere:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=nowere"
|
- "CF_CHAN=nowere"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
plus4chan:
|
plus4chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=plus4chan"
|
- "CF_CHAN=plus4chan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
sushigirl:
|
sushigirl:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=sushigirl"
|
- "CF_CHAN=sushigirl"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
synch:
|
synch:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=synch"
|
- "CF_CHAN=synch"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
tahta:
|
tahta:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=tahta"
|
- "CF_CHAN=tahta"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
tgchan:
|
tgchan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=tgchan"
|
- "CF_CHAN=tgchan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
uboachan:
|
uboachan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=uboachan"
|
- "CF_CHAN=uboachan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
waifuist:
|
waifuist:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=waifuist"
|
- "CF_CHAN=waifuist"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
|
||||||
wizchan:
|
wizchan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=wizchan"
|
- "CF_CHAN=wizchan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
9chan:
|
9chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=9chan"
|
- "CF_CHAN=9chan"
|
||||||
- "CF_REDIS_HOST=redis"
|
- "REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
|
@ -1,6 +0,0 @@
|
|||||||
FROM nginx:alpine
|
|
||||||
|
|
||||||
COPY nginx.conf /etc/nginx/
|
|
||||||
COPY ["/feed_viz", "/webroot"]
|
|
||||||
|
|
||||||
EXPOSE 80
|
|
@ -1 +0,0 @@
|
|||||||
Subproject commit c8e11a73d74e6af19cab581c94abf943daea050e
|
|
@ -1,48 +0,0 @@
|
|||||||
user nginx;
|
|
||||||
worker_processes 1;
|
|
||||||
|
|
||||||
error_log /var/log/nginx/error.log warn;
|
|
||||||
pid /var/run/nginx.pid;
|
|
||||||
|
|
||||||
events {
|
|
||||||
worker_connections 1024;
|
|
||||||
}
|
|
||||||
|
|
||||||
http {
|
|
||||||
include /etc/nginx/mime.types;
|
|
||||||
default_type application/octet-stream;
|
|
||||||
|
|
||||||
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
|
|
||||||
'$status $body_bytes_sent "$http_referer" '
|
|
||||||
'"$http_user_agent" "$http_x_forwarded_for"';
|
|
||||||
|
|
||||||
access_log /var/log/nginx/access.log main;
|
|
||||||
|
|
||||||
sendfile on;
|
|
||||||
|
|
||||||
keepalive_timeout 65;
|
|
||||||
|
|
||||||
upstream socket {
|
|
||||||
server ws_adapter:3090;
|
|
||||||
}
|
|
||||||
|
|
||||||
server {
|
|
||||||
listen 80;
|
|
||||||
|
|
||||||
index index.html;
|
|
||||||
root /webroot;
|
|
||||||
|
|
||||||
location / {
|
|
||||||
try_files $uri $uri/ /index.html;
|
|
||||||
}
|
|
||||||
|
|
||||||
location /socket {
|
|
||||||
proxy_http_version 1.1;
|
|
||||||
proxy_set_header Upgrade $http_upgrade;
|
|
||||||
proxy_set_header Connection "Upgrade";
|
|
||||||
proxy_set_header Host $host;
|
|
||||||
proxy_read_timeout 86400;
|
|
||||||
proxy_pass http://socket;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,11 +1,28 @@
|
|||||||
import json
|
import json
|
||||||
import requests
|
|
||||||
|
from hexlib.log import logger
|
||||||
|
from vanwanet_scrape.scraper import Scraper
|
||||||
|
|
||||||
from chan.chan import CHANS
|
from chan.chan import CHANS
|
||||||
|
|
||||||
existing = CHANS["8kun2"]._boards
|
existing = CHANS["8kun2"]._boards
|
||||||
updated = list(existing)
|
updated = list(existing)
|
||||||
added = set()
|
added = set()
|
||||||
|
|
||||||
|
scraper = Scraper(
|
||||||
|
headers={
|
||||||
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0",
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
"Referer": "https://8kun.top/index.html"
|
||||||
|
},
|
||||||
|
domains=[
|
||||||
|
"8kun.top",
|
||||||
|
"media.8kun.top",
|
||||||
|
"sys.8kun.net"
|
||||||
|
],
|
||||||
|
logger=logger
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def mask(board):
|
def mask(board):
|
||||||
for i, b in enumerate(updated):
|
for i, b in enumerate(updated):
|
||||||
@ -22,8 +39,7 @@ def unmask(board):
|
|||||||
|
|
||||||
|
|
||||||
for i in range(0, 500, 50):
|
for i in range(0, 500, 50):
|
||||||
r = requests.get("https://sys.8kun.top/board-search.php?page=" + str(i))
|
r = scraper.get("https://sys.8kun.top/board-search.php?page=" + str(i))
|
||||||
|
|
||||||
j = json.loads(r.text)
|
j = json.loads(r.text)
|
||||||
|
|
||||||
for board in j["boards"]:
|
for board in j["boards"]:
|
||||||
@ -36,7 +52,7 @@ for i in range(0, 500, 50):
|
|||||||
print("[+] " + board)
|
print("[+] " + board)
|
||||||
|
|
||||||
for board in existing:
|
for board in existing:
|
||||||
if board not in added:
|
if board not in added and not board.startswith("_"):
|
||||||
mask(board)
|
mask(board)
|
||||||
|
|
||||||
print("(" + ",".join('"' + u + '"' for u in updated) + ")")
|
print("(" + ",".join('"' + u + '"' for u in updated) + ")")
|
||||||
|
3527
grafana/model.json
3527
grafana/model.json
File diff suppressed because it is too large
Load Diff
73
migrate_item_ids.py
Normal file
73
migrate_item_ids.py
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
import itertools
|
||||||
|
|
||||||
|
import orjson
|
||||||
|
import psycopg2
|
||||||
|
from hexlib.misc import buffered
|
||||||
|
from tqdm import tqdm
|
||||||
|
from hexlib.db import pg_fetch_cursor_all
|
||||||
|
|
||||||
|
from chan.chan import CHANS
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
conn = psycopg2.connect(
|
||||||
|
host="192.168.1.70",
|
||||||
|
port="5432",
|
||||||
|
user="feed_archiver",
|
||||||
|
password="",
|
||||||
|
dbname="feed_archiver"
|
||||||
|
)
|
||||||
|
|
||||||
|
conn.set_client_encoding("utf8")
|
||||||
|
|
||||||
|
table = "chan_4chan_post"
|
||||||
|
new_table = "chan2_4chan_post"
|
||||||
|
|
||||||
|
print(table)
|
||||||
|
|
||||||
|
# chan_name = table.split("_")[1]
|
||||||
|
# chan = CHANS[chan_name]
|
||||||
|
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur2 = conn.cursor()
|
||||||
|
|
||||||
|
cur2.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS %s (
|
||||||
|
id TEXT PRIMARY KEY NOT NULL,
|
||||||
|
archived_on TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
||||||
|
data JSONB NOT NULL
|
||||||
|
);
|
||||||
|
""" % new_table)
|
||||||
|
|
||||||
|
cur.execute("SELECT COUNT(*) FROM %s" % table)
|
||||||
|
row_count = cur.fetchone()[0]
|
||||||
|
|
||||||
|
cur.execute("DECLARE cur1 CURSOR FOR SELECT * FROM %s" % table)
|
||||||
|
|
||||||
|
rows = pg_fetch_cursor_all(cur, name="cur1", batch_size=5000)
|
||||||
|
|
||||||
|
|
||||||
|
@buffered(batch_size=1000)
|
||||||
|
def pg_bulk_insert(rows):
|
||||||
|
val_count = len(rows[0])
|
||||||
|
|
||||||
|
cur2.execute(
|
||||||
|
"INSERT INTO %s VALUES %s ON CONFLICT DO NOTHING" %
|
||||||
|
(
|
||||||
|
new_table,
|
||||||
|
", ".join(("(" + ",".join("%s" for _ in range(val_count)) + ")") for _ in rows)
|
||||||
|
),
|
||||||
|
list(itertools.chain(*rows))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
for row in tqdm(rows, total=row_count):
|
||||||
|
id_, archived_on, data = row
|
||||||
|
|
||||||
|
new_id = data["_board"] + str(data["no"])
|
||||||
|
|
||||||
|
pg_bulk_insert([
|
||||||
|
(new_id, archived_on, orjson.dumps(data).decode())
|
||||||
|
])
|
||||||
|
pg_bulk_insert(None)
|
||||||
|
conn.commit()
|
BIN
monitoring.png
BIN
monitoring.png
Binary file not shown.
Before Width: | Height: | Size: 366 KiB |
83
run.py
83
run.py
@ -1,40 +1,24 @@
|
|||||||
import datetime
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
from datetime import datetime
|
|
||||||
from queue import Queue
|
from queue import Queue
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
import redis
|
|
||||||
|
|
||||||
|
from hexlib.concurrency import queue_iter
|
||||||
from hexlib.db import VolatileBooleanState, VolatileState
|
from hexlib.db import VolatileBooleanState, VolatileState
|
||||||
from hexlib.monitoring import Monitoring
|
from hexlib.env import get_web, get_redis
|
||||||
|
from hexlib.log import logger
|
||||||
|
|
||||||
from chan.chan import CHANS
|
from chan.chan import CHANS
|
||||||
from post_process import post_process
|
from post_process import post_process
|
||||||
from util import logger, Web
|
|
||||||
|
|
||||||
BYPASS_RPS = False
|
|
||||||
|
|
||||||
DBNAME = "chan_feed"
|
|
||||||
if os.environ.get("CF_INFLUXDB"):
|
|
||||||
influxdb = Monitoring(DBNAME, host=os.environ.get("CF_INFLUXDB"), logger=logger, batch_size=100, flush_on_exit=True)
|
|
||||||
MONITORING = True
|
|
||||||
else:
|
|
||||||
MONITORING = False
|
|
||||||
|
|
||||||
REDIS_HOST = os.environ.get("CF_REDIS_HOST", "localhost")
|
|
||||||
REDIS_PORT = os.environ.get("CF_REDIS_PORT", 6379)
|
|
||||||
CHAN = os.environ.get("CF_CHAN", None)
|
CHAN = os.environ.get("CF_CHAN", None)
|
||||||
CF_PUBLISH = os.environ.get("CF_PUBLISH", False)
|
|
||||||
|
|
||||||
ARC_LISTS = os.environ.get("CF_ARC_LISTS", "arc").split(",")
|
|
||||||
|
|
||||||
|
|
||||||
class ChanScanner:
|
class ChanScanner:
|
||||||
def __init__(self, helper, proxy):
|
def __init__(self, helper):
|
||||||
self.web = Web(influxdb if MONITORING else None, rps=helper.rps, get_method=helper.get_method, proxy=proxy)
|
self.web = get_web()
|
||||||
self.helper = helper
|
self.helper = helper
|
||||||
self.state = state
|
self.state = state
|
||||||
|
|
||||||
@ -83,9 +67,8 @@ def once(func):
|
|||||||
|
|
||||||
class ChanState:
|
class ChanState:
|
||||||
def __init__(self, prefix):
|
def __init__(self, prefix):
|
||||||
self._posts = VolatileBooleanState(prefix, host=REDIS_HOST, port=REDIS_PORT)
|
self._posts = VolatileBooleanState(prefix)
|
||||||
self._threads = VolatileState(prefix, host=REDIS_HOST, port=REDIS_PORT)
|
self._threads = VolatileState(prefix)
|
||||||
print("redis host=" + REDIS_HOST)
|
|
||||||
|
|
||||||
def mark_visited(self, item: int):
|
def mark_visited(self, item: int):
|
||||||
self._posts["posts"][item] = True
|
self._posts["posts"][item] = True
|
||||||
@ -109,18 +92,12 @@ class ChanState:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def publish_worker(queue: Queue, helper, p):
|
def publish_worker(queue: Queue, helper):
|
||||||
while True:
|
for item, board in queue_iter(queue):
|
||||||
try:
|
try:
|
||||||
item, board = queue.get()
|
publish(item, board, helper)
|
||||||
if item is None:
|
|
||||||
break
|
|
||||||
publish(item, board, helper,)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(str(e) + ": " + traceback.format_exc())
|
logger.error(str(e) + ": " + traceback.format_exc())
|
||||||
finally:
|
|
||||||
queue.task_done()
|
|
||||||
|
|
||||||
|
|
||||||
@once
|
@once
|
||||||
@ -131,23 +108,7 @@ def publish(item, board, helper):
|
|||||||
routing_key = "%s.%s.%s" % (CHAN, item_type, board)
|
routing_key = "%s.%s.%s" % (CHAN, item_type, board)
|
||||||
|
|
||||||
message = json.dumps(item, separators=(',', ':'), ensure_ascii=False, sort_keys=True)
|
message = json.dumps(item, separators=(',', ':'), ensure_ascii=False, sort_keys=True)
|
||||||
if CF_PUBLISH:
|
rdb.lpush("arc.chan2." + routing_key, message)
|
||||||
rdb.publish("chan." + routing_key, message)
|
|
||||||
for arc in ARC_LISTS:
|
|
||||||
rdb.lpush(arc + ".chan." + routing_key, message)
|
|
||||||
|
|
||||||
if MONITORING:
|
|
||||||
distance = datetime.utcnow() - datetime.utcfromtimestamp(helper.item_mtime(item))
|
|
||||||
influxdb.log([{
|
|
||||||
"measurement": CHAN,
|
|
||||||
"time": str(datetime.utcnow()),
|
|
||||||
"tags": {
|
|
||||||
"board": board
|
|
||||||
},
|
|
||||||
"fields": {
|
|
||||||
"distance": distance.total_seconds()
|
|
||||||
}
|
|
||||||
}])
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@ -157,30 +118,20 @@ if __name__ == "__main__":
|
|||||||
if save_folder:
|
if save_folder:
|
||||||
chan_helper.save_folder = save_folder
|
chan_helper.save_folder = save_folder
|
||||||
|
|
||||||
proxy = None
|
|
||||||
if os.environ.get("CF_PROXY"):
|
|
||||||
proxy = os.environ.get("CF_PROXY")
|
|
||||||
logger.info("Using proxy %s" % proxy)
|
|
||||||
|
|
||||||
if BYPASS_RPS:
|
|
||||||
chan_helper.rps = 10
|
|
||||||
|
|
||||||
state = ChanState(CHAN)
|
state = ChanState(CHAN)
|
||||||
rdb = redis.Redis(host=REDIS_HOST, port=REDIS_PORT)
|
rdb = get_redis()
|
||||||
|
|
||||||
publish_q = Queue()
|
publish_q = Queue()
|
||||||
for _ in range(3):
|
publish_thread = Thread(target=publish_worker, args=(publish_q, chan_helper))
|
||||||
publish_thread = Thread(target=publish_worker, args=(publish_q, chan_helper, proxy))
|
publish_thread.setDaemon(True)
|
||||||
publish_thread.setDaemon(True)
|
publish_thread.start()
|
||||||
publish_thread.start()
|
|
||||||
|
|
||||||
s = ChanScanner(chan_helper, proxy)
|
s = ChanScanner(chan_helper)
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
for p, b in s.all_posts():
|
for p, b in s.all_posts():
|
||||||
publish_q.put((p, b))
|
publish_q.put((p, b))
|
||||||
except KeyboardInterrupt as e:
|
except KeyboardInterrupt as e:
|
||||||
print("cleanup..")
|
print("cleanup..")
|
||||||
for _ in range(3):
|
publish_q.put(None)
|
||||||
publish_q.put((None, None))
|
|
||||||
break
|
break
|
||||||
|
2
start.sh
2
start.sh
@ -1,2 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
CURRENT_UID=$(id -u):$(id -g) SAVE_FOLDER=$(pwd)/data docker-compose up --force-recreate
|
|
86
util.py
86
util.py
@ -1,86 +0,0 @@
|
|||||||
import logging
|
|
||||||
import sys
|
|
||||||
import traceback
|
|
||||||
from datetime import datetime
|
|
||||||
from logging import FileHandler, StreamHandler
|
|
||||||
|
|
||||||
import requests
|
|
||||||
from hexlib.misc import rate_limit
|
|
||||||
from urllib3 import disable_warnings
|
|
||||||
|
|
||||||
disable_warnings()
|
|
||||||
|
|
||||||
last_time_called = dict()
|
|
||||||
|
|
||||||
logger = logging.getLogger("default")
|
|
||||||
logger.setLevel(logging.DEBUG)
|
|
||||||
|
|
||||||
formatter = logging.Formatter('%(asctime)s %(levelname)-5s %(message)s')
|
|
||||||
for h in logger.handlers:
|
|
||||||
logger.removeHandler(h)
|
|
||||||
logger.addHandler(StreamHandler(sys.stdout))
|
|
||||||
|
|
||||||
|
|
||||||
class Web:
|
|
||||||
def __init__(self, monitoring, rps=1 / 2, proxy=None, get_method=None):
|
|
||||||
self.session = requests.Session()
|
|
||||||
if proxy:
|
|
||||||
self.session.proxies = {"http": proxy, "https": proxy}
|
|
||||||
self.session.verify = False
|
|
||||||
self._rps = rps
|
|
||||||
self.monitoring = monitoring
|
|
||||||
self._get_method = get_method
|
|
||||||
|
|
||||||
@rate_limit(self._rps)
|
|
||||||
def _get(url, **kwargs):
|
|
||||||
retries = 3
|
|
||||||
|
|
||||||
while retries > 0:
|
|
||||||
retries -= 1
|
|
||||||
try:
|
|
||||||
if self._get_method:
|
|
||||||
return self._get_method(url, **kwargs)
|
|
||||||
return self.session.get(url, **kwargs)
|
|
||||||
except KeyboardInterrupt as e:
|
|
||||||
raise e
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning("Error with request %s: %s" % (url, str(e)))
|
|
||||||
raise Exception("Gave up request after maximum number of retries")
|
|
||||||
|
|
||||||
self._get = _get
|
|
||||||
|
|
||||||
def get(self, url, **kwargs):
|
|
||||||
try:
|
|
||||||
r = self._get(url, **kwargs)
|
|
||||||
|
|
||||||
logger.debug("GET %s <%d>" % (url, r.status_code))
|
|
||||||
if self.monitoring:
|
|
||||||
self.monitoring.log([{
|
|
||||||
"measurement": "web",
|
|
||||||
"time": str(datetime.utcnow()),
|
|
||||||
"fields": {
|
|
||||||
"status_code": r.status_code,
|
|
||||||
"size": len(r.content),
|
|
||||||
},
|
|
||||||
"tags": {
|
|
||||||
"ok": r.status_code == 200
|
|
||||||
},
|
|
||||||
}])
|
|
||||||
return r
|
|
||||||
except KeyboardInterrupt as e:
|
|
||||||
raise e
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(str(e) + traceback.format_exc())
|
|
||||||
if self.monitoring:
|
|
||||||
self.monitoring.log([{
|
|
||||||
"measurement": "web",
|
|
||||||
"time": str(datetime.utcnow()),
|
|
||||||
"fields": {
|
|
||||||
"status_code": 0,
|
|
||||||
"size": 0,
|
|
||||||
},
|
|
||||||
"tags": {
|
|
||||||
"ok": False
|
|
||||||
},
|
|
||||||
}])
|
|
||||||
return None
|
|
Loading…
x
Reference in New Issue
Block a user