mirror of
https://github.com/simon987/chan_feed.git
synced 2025-04-10 14:06:42 +00:00
replace rabbitMQ with redis
This commit is contained in:
parent
5eccee69cf
commit
984388acc7
@ -1,7 +1,7 @@
|
|||||||
### chan_feed
|
### chan_feed
|
||||||
|
|
||||||
Daemon that fetches posts from compatible *chan
|
Daemon that fetches posts from compatible *chan
|
||||||
image boards and publishes serialised JSON to RabbitMQ
|
image boards and publishes serialised JSON to redis
|
||||||
for real-time ingest.
|
for real-time ingest.
|
||||||
|
|
||||||
Compatible image boards: 4chan, lainchan, uboachan,
|
Compatible image boards: 4chan, lainchan, uboachan,
|
||||||
|
101
chan/chan.py
101
chan/chan.py
@ -391,46 +391,67 @@ CHANS = {
|
|||||||
"https://media.8kun.top/",
|
"https://media.8kun.top/",
|
||||||
"/res/",
|
"/res/",
|
||||||
"file_store/",
|
"file_store/",
|
||||||
(
|
("1", "55chan", "_64chen", "8bantb", "8tube", "a", "_abdl2", "agdg", "amv", "aneki", "animu", "animus", "ara",
|
||||||
"_1", "55chan", "64chen", "8bantb", "8tube", "a", "abdl2", "agdg", "amv", "aneki", "animu", "animus",
|
"arda", "arms", "asatru", "asmr", "aus", "ausneets", "__b", "__baka", "_baneposting", "__baseballbat",
|
||||||
"ara", "arda", "arms", "asatru", "asmr", "aus", "ausneets", "_b", "_baka", "baneposting", "_baseballbat",
|
"bcards", "bleached", "blog", "__bonehurtingjuice", "bq", "__brit", "bubblegum", "builders", "bunkers", "butt",
|
||||||
"bcards", "bleached", "blog", "_bonehurtingjuice", "bq", "_brit", "bubblegum", "builders", "bunkers",
|
"cafechan", "caffe", "canada", "cath", "chori", "choroy", "christian", "christianity", "christianmeme",
|
||||||
"butt",
|
"cicachan", "civicrs", "ck", "cloveros", "co", "cow", "__cuckquean", "cute", "cyber", "cyoa", "__czech",
|
||||||
"cafechan", "caffe", "canada", "cath", "chori", "choroy", "christian", "christianity", "christianmeme",
|
"dadtalk", "danpu", "dao101", "degen", "delete", "dempart", "desu", "diaperfags", "diaperfetish", "dir",
|
||||||
"cicachan", "civicrs", "ck", "cloveros", "co", "cow", "_cuckquean", "cute", "cyber", "cyoa", "_czech",
|
"__dolphin", "dpfag", "_dpr", "druid", "_e9y", "eatme", "ebola", "eerie", "egy", "egypt", "etika", "eu",
|
||||||
"dadtalk", "danpu", "dao101", "degen", "delete", "dempart", "desu", "diaperfags", "diaperfetish", "dir",
|
"euskotxa", "__exit", "f1", "fa", "fairy", "fallen", "fast", "faygo", "feet", "femaledomination", "feri",
|
||||||
"_dolphin", "dpfag", "dpr", "druid", "e9y", "eatme", "ebola", "eerie", "egy", "egypt", "etika", "eu",
|
"__fightcomms", "film", "flemish", "floss", "fortnite", "freedomzine", "fukemo", "fumo", "fur", "furry", "g",
|
||||||
"euskotxa", "_exit", "f1", "fa", "fairy", "fallen", "fast", "faygo", "feet", "femaledomination", "feri",
|
"gamergatehq", "genesis", "_gesu", "ggis", "girltalk", "greenbreeze", "gts", "haxxor", "hentai", "hentaiclub",
|
||||||
"_fightcomms", "film", "flemish", "floss", "fortnite", "freedomzine", "fukemo", "fumo", "fur", "furry", "g",
|
"__herm", "hermetics", "hgb", "hgg", "__hindu", "hisparefugio", "hissss", "hnt", "hover", "hybrids", "hydrus",
|
||||||
"gamergatehq", "genesis", "gesu", "ggis", "girltalk", "greenbreeze", "gts", "haxxor", "hentai",
|
"hypno", "_hypnochan", "icup", "imperium", "in", "ipfs", "ircsecrets", "islam", "ita", "jaooo", "jewess",
|
||||||
"hentaiclub", "_herm", "hermetics", "hgb", "hgg", "_hindu", "hisparefugio", "hissss", "hnt", "hover",
|
"jmaatv", "joker", "jp", "k", "_kekforceusa", "kemono", "kocsog", "kohlchan", "__(komica)", "_komika", "kpop",
|
||||||
"hybrids", "hydrus", "hypno", "hypnochan", "icup", "imperium", "in", "ipfs", "ircsecrets", "islam", "ita",
|
"lain", "_lego", "leo", "lewd", "lit", "lol", "loomis", "loroy", "luddite", "magick", "maka", "mde",
|
||||||
"jaooo", "jewess", "jmaatv", "joker", "jp", "k", "kekforceusa", "kemono", "kocsog", "kohlchan", "_komica",
|
"merrychristmas", "miku", "milf", "mom", "monster", "msb", "mtb", "mtt", "mu", "n0thingness", "nanachi",
|
||||||
"komika", "kpop", "lain", "lego", "leo", "lewd", "lit", "lol", "loomis", "loroy", "luddite", "magick",
|
"natiofr", "nep", "newbrit", "newsplus", "nobody", "nofap", "nofur", "nogatco", "nothingness", "ntr", "_nuke8",
|
||||||
"maka", "mde", "merrychristmas", "miku", "milf", "mom", "monster", "msb", "mtb", "mtt", "mu", "n0thingness",
|
"oanda", "__ocb", "__ocult", "_omorashi", "opmk", "os", "otter", "p", "panconleche", "pdfs", "__peaceofmind",
|
||||||
"nanachi", "natiofr", "nep", "newbrit", "newsplus", "nobody", "nofap", "nofur", "nogatco", "nothingness",
|
"pen", "philosophy", "_pkmns", "pnd", "pokeporn", "polymath", "pone", "projectdcomms", "__pyatibrat", "_qm",
|
||||||
"ntr", "nuke8", "oanda", "_ocb", "_ocult", "omorashi", "opmk", "os", "otter", "p", "panconleche", "pdfs",
|
"qpatriotresearch", "__qresearch", "qrnews", "__rand21", "rec", "rmart", "rusrandom", "rzabczan", "s", "s8s",
|
||||||
"_peaceofmind", "pen", "philosophy", "pkmns", "pnd", "pokeporn", "polymath", "pone", "projectdcomms",
|
"sag", "sapphic", "shousa", "sikhi", "sip", "sl", "_snowboarding", "socpl", "strek", "subs", "__sve", "t",
|
||||||
"_pyatibrat", "qm", "qpatriotresearch", "_qresearch", "qrnews", "_rand21", "rec", "rmart", "rusrandom",
|
"tan", "tdt", "tech9", "techan", "techbunker", "tek", "templeos", "tenda", "teraha", "__texit", "tf2", "__tg",
|
||||||
"rzabczan", "s", "s8s", "sag", "sapphic", "shousa", "sikhi", "sip", "sl", "snowboarding", "socpl", "strek",
|
"_thb", "thedickshow", "throat", "_tibby", "tikilounge", "tkr", "tr55", "__trashcollector", "truthlegion",
|
||||||
"subs", "_sve", "t", "tan", "tdt", "tech9", "techan", "techbunker", "tek", "templeos", "tenda", "teraha",
|
"tulpamancers", "turul", "tutturu", "tv", "u", "uaco", "_ucla", "underground", "__usersunion", "v", "vichan",
|
||||||
"_texit", "tf2", "_tg", "thb", "thedickshow", "throat", "tibby", "tikilounge", "tkr", "tr55",
|
"vietkong", "vietnam", "vore", "vr", "_warposting", "wdsc", "webm", "wg", "__wga", "wikieat", "wis", "wmafsex",
|
||||||
"_trashcollector", "truthlegion", "tulpamancers", "turul", "tutturu", "tv", "u", "uaco", "ucla",
|
"workrelated", "wqt", "wx", "x", "__xivl", "__xtian", "zoomerright", "zundel", "0", "55sync", "abdl",
|
||||||
"underground", "_usersunion", "v", "vichan", "vietkong", "vietnam", "vore", "vr", "warposting", "wdsc",
|
"alleycat", "_arisu", "arisubunker", "_arp", "bane", "_bimbohypnosis", "_bluemoon", "bmn", "brains", "cats",
|
||||||
"webm", "wg", "_wga", "wikieat", "wis", "wmafsex", "workrelated", "wqt", "wx", "x", "_xivl", "_xtian",
|
"_chance", "clang", "comfy", "critters", "_cursed", "_cvine", "cze", "d", "dcaco", "demonp", "_dnmd", "doomer",
|
||||||
"zoomerright", "zundel",
|
"doot", "elitabla", "_empanada", "erp", "_falseflags", "fashionplus", "fata", "femdom", "fit", "_flg",
|
||||||
|
"_fr8chan", "futyitorna", "garrett", "_giantesshentai", "hentaiporn", "hmfr", "hooliedayz", "hsp", "hujszon",
|
||||||
"0", "55sync", "abdl", "alleycat", "arisu", "arisubunker", "arp", "bane", "bimbohypnosis", "bluemoon",
|
"iep", "just", "k46", "kind", "_kiwc", "kukichan", "_lacajita", "_legos", "lgd", "liveanarchy",
|
||||||
"bmn", "brains", "cats", "chance", "clang", "comfy", "critters", "cursed", "cvine", "cze", "d", "dcaco",
|
"luciddreaming", "m", "_mapp", "mental", "_mets", "_milhis", "monarchy", "_myon", "newhomosuck", "newsci",
|
||||||
"demonp", "dnmd", "doomer", "doot", "elitabla", "empanada", "erp", "falseflags", "fashionplus", "fata",
|
"_nine", "oes", "onepiece", "_other369", "otomad", "_penguware", "psyid", "qresearch2gen", "rule34",
|
||||||
"femdom", "fit", "flg", "fr8chan", "futyitorna", "garrett", "giantesshentai", "hentaiporn", "hmfr",
|
"_satorare", "sonyeon", "split", "sunflower", "_tae", "test", "_tft", "tftg", "toy", "trap", "_vein",
|
||||||
"hooliedayz", "hsp", "hujszon", "iep", "just", "k46", "kind", "kiwc", "kukichan", "lacajita", "legos",
|
"_virtualreality", "vivian", "voros", "wbr", "_weird", "wooo", "yuuka", "fringe", "random", "cuteboys", "tech",
|
||||||
"lgd", "liveanarchy", "luciddreaming", "m", "mapp", "mental", "mets", "milhis", "monarchy", "myon",
|
"internatiomall", "interracial", "liberty", "htg", "mai", "komica", "cutebois", "argentina", "r", "tf",
|
||||||
"newhomosuck", "newsci", "nine", "oes", "onepiece", "other369", "otomad", "penguware", "psyid",
|
"draftnote", "abcu", "k117", "britfeel", "liberty", "htg", "mai", "komica", "cutebois", "argentina", "r", "tf",
|
||||||
"qresearch2gen", "rule34", "satorare", "sonyeon", "split", "sunflower", "tae", "test", "tft", "tftg", "toy",
|
"draftnote", "abcu", "k117", "britfeel", "y", "an", "francofil", "portal", "royalhawk", "vdm", "bullmask",
|
||||||
"trap", "vein", "virtualreality", "vivian", "voros", "wbr", "weird", "wooo", "yuuka",
|
"imouto", "tripfriend", "arepa", "rwby", "sw", "y", "an", "francofil", "portal", "royalhawk", "vdm",
|
||||||
),
|
"bullmask", "imouto", "tripfriend", "arepa", "rwby", "sw", "magali", "hikki", "biz", "eris", "india", "mg",
|
||||||
rps=1
|
"magali", "hikki", "biz", "eris", "india", "mg", "out", "infinity", "tifa", "muslim", "out", "infinity",
|
||||||
|
"tifa", "muslim", "slackware", "archivo", "flatearth", "yaoi", "boombox", "wdp", "thedonald",
|
||||||
|
"libertedexpression", "khyber", "jsr", "slackware", "archivo", "flatearth", "yaoi", "boombox", "wdp",
|
||||||
|
"thedonald", "libertedexpression", "khyber", "jsr", "fso", "wumpawhip", "buddhismhotline", "indochinaexpats",
|
||||||
|
"ett", "redbar", "skyline350gt", "asc", "bazafx", "bestkorea", "covid19", "sokra", "bowsu", "qpatriotsunited",
|
||||||
|
"verzet", "wlctint", "cultstate", "melody", "vedic", "yhvh", "1cok", "astropolis", "fso", "wumpawhip",
|
||||||
|
"buddhismhotline", "indochinaexpats", "ett", "redbar", "skyline350gt", "asc", "bazafx", "bestkorea", "covid19",
|
||||||
|
"sokra", "bowsu", "qpatriotsunited", "verzet", "wlctint", "cultstate", "melody", "vedic", "yhvh", "1cok",
|
||||||
|
"astropolis", "earthlibfront", "pardochan", "stanislawowski", "thetrump", "yukkuri", "1825kun", "cryptobtc",
|
||||||
|
"isol", "knights", "language", "rr34", "sperg", "awaken", "belgium", "blizzard", "brain", "buddha", "dbs",
|
||||||
|
"deestevensvoice4you", "f4net", "fuckuchina", "gbtv", "hairygirls", "hallaca", "homeowner", "indo", "jersey",
|
||||||
|
"jigglypuff", "lbt", "madh4ckrs", "medcorp", "miamichan", "mrsfrisby", "mulatto", "mupro", "nhoodlink",
|
||||||
|
"p5porn", "patriotrevolution", "peko", "projectobject", "prop", "pups", "qanonspain", "qcastellano",
|
||||||
|
"earthlibfront", "pardochan", "stanislawowski", "thetrump", "yukkuri", "1825kun", "cryptobtc", "isol",
|
||||||
|
"knights", "language", "rr34", "sperg", "awaken", "belgium", "blizzard", "brain", "buddha", "dbs",
|
||||||
|
"deestevensvoice4you", "f4net", "fuckuchina", "gbtv", "hairygirls", "hallaca", "homeowner", "indo", "jersey",
|
||||||
|
"jigglypuff", "lbt", "madh4ckrs", "medcorp", "miamichan", "mrsfrisby", "mulatto", "mupro", "nhoodlink",
|
||||||
|
"p5porn", "patriotrevolution", "peko", "projectobject", "prop", "pups", "qanonspain", "qcastellano", "qsocial",
|
||||||
|
"resist", "revolu", "skemt", "sketheory", "spaceforce", "surro", "thehand", "transit", "vitaecryptocurrency",
|
||||||
|
"qsocial", "resist", "revolu", "skemt", "sketheory", "spaceforce", "surro", "thehand", "transit",
|
||||||
|
"vitaecryptocurrency"),
|
||||||
|
rps=2
|
||||||
),
|
),
|
||||||
"hispachan": HispachanHtmlHelper(
|
"hispachan": HispachanHtmlHelper(
|
||||||
30,
|
30,
|
||||||
|
@ -2,6 +2,7 @@ version: "2.1"
|
|||||||
volumes:
|
volumes:
|
||||||
influxdb_data:
|
influxdb_data:
|
||||||
pg_data:
|
pg_data:
|
||||||
|
pg_data_imhash:
|
||||||
|
|
||||||
services:
|
services:
|
||||||
influxdb:
|
influxdb:
|
||||||
@ -26,8 +27,20 @@ services:
|
|||||||
interval: 5s
|
interval: 5s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
rabbitmq:
|
db_imhashdb:
|
||||||
image: rabbitmq:alpine
|
image: simon987/pg_hamming
|
||||||
|
volumes:
|
||||||
|
- pg_data_imhash:/var/lib/postgresql/data
|
||||||
|
environment:
|
||||||
|
- "POSTGRES_USER=imhashdb"
|
||||||
|
- "POSTGRES_PASSWORD=changeme"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U imhashdb"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
redis:
|
||||||
|
image: redis
|
||||||
archiver:
|
archiver:
|
||||||
image: simon987/feed_archiver
|
image: simon987/feed_archiver
|
||||||
restart: always
|
restart: always
|
||||||
@ -38,422 +51,326 @@ services:
|
|||||||
- "FA_DB_HOST=db"
|
- "FA_DB_HOST=db"
|
||||||
- "FA_DB_USER=feed_archiver"
|
- "FA_DB_USER=feed_archiver"
|
||||||
- "FA_DB_PASSWORD=changeme"
|
- "FA_DB_PASSWORD=changeme"
|
||||||
- "FA_MQ_CONNSTR=amqp://guest:guest@rabbitmq:5672/"
|
- "FA_REDIS_ADDR=redis:6379"
|
||||||
- "FA_EXCHANGES=chan"
|
- "FA_PATTERN=arc.*"
|
||||||
ws_adapter:
|
imhashdb:
|
||||||
image: simon987/ws_feed_adapter
|
image: simon987/imhashdb
|
||||||
|
restart: always
|
||||||
|
entrypoint: "/build/imhashdb/cli/cli hasher"
|
||||||
|
volumes:
|
||||||
|
- ${SAVE_FOLDER}:/data/
|
||||||
environment:
|
environment:
|
||||||
- "WSA_MQ_CONNSTR=amqp://guest:guest@rabbitmq:5672/"
|
- "IMHASHDB_STORE=/data"
|
||||||
feed_viz_frontend:
|
- "IMHASHDB_REDIS_ADDR=redis:6379"
|
||||||
build: ./docker_viz/
|
- "IMHASHDB_PG_USER=imhashdb"
|
||||||
ports:
|
- "IMHASHDB_PG_PASSWORD=changeme"
|
||||||
- 127.0.0.1:3005:80
|
- "IMHASHDB_PG_DATABASE=imhashdb"
|
||||||
|
- "IMHASHDB_PG_HOST=db_imhashdb"
|
||||||
|
- "IMHASHDB_HASH_CONCURRENCY=16"
|
||||||
# Image boards
|
# Image boards
|
||||||
|
4chan:
|
||||||
|
image: simon987/chan_feed
|
||||||
|
restart: always
|
||||||
|
user: ${CURRENT_UID}
|
||||||
|
environment:
|
||||||
|
- "CF_CHAN=4chan"
|
||||||
|
- "CF_REDIS_HOST=redis"
|
||||||
|
- "CF_INFLUXDB=influxdb"
|
||||||
|
|
||||||
0chan:
|
0chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=0chan"
|
- "CF_CHAN=0chan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
22chan:
|
22chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=22chan"
|
- "CF_CHAN=22chan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
2chan:
|
2chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=2chan"
|
- "CF_CHAN=2chan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
|
|
||||||
2chhk:
|
2chhk:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=2chhk"
|
- "CF_CHAN=2chhk"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
38chan:
|
38chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=38chan"
|
- "CF_CHAN=38chan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
410chan:
|
410chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=410chan"
|
- "CF_CHAN=410chan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
4chan:
|
|
||||||
image: simon987/chan_feed
|
|
||||||
restart: always
|
|
||||||
user: ${CURRENT_UID}
|
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
|
||||||
- "CF_CHAN=4chan"
|
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
|
||||||
- "CF_INFLUXDB=influxdb"
|
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
4kev:
|
4kev:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=4kev"
|
- "CF_CHAN=4kev"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
7chan:
|
7chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=7chan"
|
- "CF_CHAN=7chan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
8kun:
|
8kun:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=8kun"
|
- "CF_CHAN=8kun"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
alokal:
|
alokal:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=alokal"
|
- "CF_CHAN=alokal"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
aurorachan:
|
aurorachan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=aurorachan"
|
- "CF_CHAN=aurorachan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
awsumchan:
|
awsumchan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=awsumchan"
|
- "CF_CHAN=awsumchan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
chanon:
|
chanon:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=chanon"
|
- "CF_CHAN=chanon"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
chanorg:
|
chanorg:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=chanorg"
|
- "CF_CHAN=chanorg"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
desuchan:
|
desuchan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=desuchan"
|
- "CF_CHAN=desuchan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
doushio:
|
doushio:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=doushio"
|
- "CF_CHAN=doushio"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
endchan:
|
endchan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=endchan"
|
- "CF_CHAN=endchan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
fchan:
|
fchan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=fchan"
|
- "CF_CHAN=fchan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
gnfos:
|
gnfos:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=gnfos"
|
- "CF_CHAN=gnfos"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
hispachan:
|
hispachan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=hispachan"
|
- "CF_CHAN=hispachan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
horochan:
|
horochan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=horochan"
|
- "CF_CHAN=horochan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
iichan:
|
iichan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=iichan"
|
- "CF_CHAN=iichan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
lainchan:
|
lainchan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=lainchan"
|
- "CF_CHAN=lainchan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
lolnada:
|
lolnada:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=lolnada"
|
- "CF_CHAN=lolnada"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
nowere:
|
nowere:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=nowere"
|
- "CF_CHAN=nowere"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
plus4chan:
|
plus4chan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=plus4chan"
|
- "CF_CHAN=plus4chan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
sushigirl:
|
sushigirl:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=sushigirl"
|
- "CF_CHAN=sushigirl"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
synch:
|
synch:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=synch"
|
- "CF_CHAN=synch"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
tahta:
|
tahta:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=tahta"
|
- "CF_CHAN=tahta"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
tgchan:
|
tgchan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=tgchan"
|
- "CF_CHAN=tgchan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
uboachan:
|
uboachan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=uboachan"
|
- "CF_CHAN=uboachan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
waifuist:
|
waifuist:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=waifuist"
|
- "CF_CHAN=waifuist"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
|
||||||
wizchan:
|
wizchan:
|
||||||
image: simon987/chan_feed
|
image: simon987/chan_feed
|
||||||
restart: always
|
restart: always
|
||||||
user: ${CURRENT_UID}
|
user: ${CURRENT_UID}
|
||||||
volumes:
|
|
||||||
- ${SAVE_FOLDER}:/data/
|
|
||||||
environment:
|
environment:
|
||||||
- "CF_CHAN=wizchan"
|
- "CF_CHAN=wizchan"
|
||||||
- "CF_MQ_HOST=rabbitmq"
|
- "CF_REDIS_HOST=redis"
|
||||||
- "CF_INFLUXDB=influxdb"
|
- "CF_INFLUXDB=influxdb"
|
||||||
- "CF_SAVE_FOLDER=/data/"
|
|
||||||
|
42
get_8kun_boards.py
Normal file
42
get_8kun_boards.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import json
|
||||||
|
import requests
|
||||||
|
from chan.chan import CHANS
|
||||||
|
|
||||||
|
existing = CHANS["8kun2"]._boards
|
||||||
|
updated = list(existing)
|
||||||
|
added = set()
|
||||||
|
|
||||||
|
|
||||||
|
def mask(board):
|
||||||
|
for i, b in enumerate(updated):
|
||||||
|
if b == board:
|
||||||
|
updated[i] = "_" + board
|
||||||
|
print("[-] " + board)
|
||||||
|
|
||||||
|
|
||||||
|
def unmask(board):
|
||||||
|
for i, b in enumerate(updated):
|
||||||
|
if b == ("_" + board):
|
||||||
|
updated[i] = board
|
||||||
|
print("[*] " + board)
|
||||||
|
|
||||||
|
|
||||||
|
for i in range(0, 500, 50):
|
||||||
|
r = requests.get("https://sys.8kun.top/board-search.php?page=" + str(i))
|
||||||
|
|
||||||
|
j = json.loads(r.text)
|
||||||
|
|
||||||
|
for board in j["boards"]:
|
||||||
|
added.add(board)
|
||||||
|
|
||||||
|
if ("_" + board) in updated:
|
||||||
|
unmask(board)
|
||||||
|
elif board not in existing:
|
||||||
|
updated.append(board)
|
||||||
|
print("[+] " + board)
|
||||||
|
|
||||||
|
for board in existing:
|
||||||
|
if board not in added:
|
||||||
|
mask(board)
|
||||||
|
|
||||||
|
print("(" + ",".join('"' + u + '"' for u in updated) + ")")
|
@ -1,14 +1,4 @@
|
|||||||
import hashlib
|
from urllib.parse import urljoin
|
||||||
import os
|
|
||||||
import zlib
|
|
||||||
from io import BytesIO
|
|
||||||
from urllib.parse import urljoin, urlparse
|
|
||||||
|
|
||||||
import imagehash
|
|
||||||
from PIL import Image
|
|
||||||
from hexlib.imhash import b64hash
|
|
||||||
|
|
||||||
from util import logger
|
|
||||||
|
|
||||||
from hexlib.regex import HTML_HREF_RE, LINK_RE
|
from hexlib.regex import HTML_HREF_RE, LINK_RE
|
||||||
|
|
||||||
@ -32,51 +22,8 @@ def _is_image(url):
|
|||||||
return url.lower().endswith(IMAGE_FILETYPES)
|
return url.lower().endswith(IMAGE_FILETYPES)
|
||||||
|
|
||||||
|
|
||||||
def image_meta(url, url_idx, web, helper, board):
|
def post_process(item, board, helper):
|
||||||
r = web.get(url)
|
item["_v"] = 1.7
|
||||||
if not r:
|
|
||||||
logger.warning("Could not download image")
|
|
||||||
return None
|
|
||||||
buf = r.content
|
|
||||||
|
|
||||||
sha1 = hashlib.sha1(buf).hexdigest()
|
|
||||||
|
|
||||||
if helper.save_folder:
|
|
||||||
path = os.path.join(helper.save_folder, str(helper.db_id), board)
|
|
||||||
path += "/" + sha1[0]
|
|
||||||
path += "/" + sha1[1:3]
|
|
||||||
os.makedirs(path, exist_ok=True)
|
|
||||||
with open(os.path.join(path, sha1 + os.path.splitext(url)[1]), "wb") as out:
|
|
||||||
out.write(buf)
|
|
||||||
|
|
||||||
try:
|
|
||||||
f = BytesIO(buf)
|
|
||||||
im = Image.open(f)
|
|
||||||
|
|
||||||
meta = {
|
|
||||||
"url": url_idx,
|
|
||||||
"size": len(buf),
|
|
||||||
"width": im.width,
|
|
||||||
"height": im.height,
|
|
||||||
"sha1": sha1,
|
|
||||||
"md5": hashlib.md5(buf).hexdigest(),
|
|
||||||
"crc32": format(zlib.crc32(buf), "x"),
|
|
||||||
"dhash": b64hash(imagehash.dhash(im, hash_size=12), 18),
|
|
||||||
"phash": b64hash(imagehash.phash(im, hash_size=12), 18),
|
|
||||||
"ahash": b64hash(imagehash.average_hash(im, hash_size=12), 18),
|
|
||||||
"whash": b64hash(imagehash.whash(im, hash_size=8), 8),
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning("exception during image post processing: " + str(e))
|
|
||||||
return None
|
|
||||||
|
|
||||||
del im, r, buf
|
|
||||||
|
|
||||||
return meta
|
|
||||||
|
|
||||||
|
|
||||||
def post_process(item, board, helper, web):
|
|
||||||
item["_v"] = 1.6
|
|
||||||
item["_id"] = helper.item_unique_id(item, board)
|
item["_id"] = helper.item_unique_id(item, board)
|
||||||
|
|
||||||
item["_board"] = board
|
item["_board"] = board
|
||||||
@ -84,8 +31,6 @@ def post_process(item, board, helper, web):
|
|||||||
|
|
||||||
item["_urls"] = helper.item_urls(item, board)
|
item["_urls"] = helper.item_urls(item, board)
|
||||||
|
|
||||||
item["_img"] = [image_meta(url, i, web, helper, board) for i, url in enumerate(item["_urls"]) if _is_image(url)]
|
|
||||||
|
|
||||||
return item
|
return item
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,12 +1,10 @@
|
|||||||
imagehash
|
|
||||||
Pillow
|
|
||||||
requests
|
requests
|
||||||
requests[socks]
|
requests[socks]
|
||||||
stem
|
stem
|
||||||
influxdb
|
influxdb
|
||||||
pika
|
|
||||||
bs4
|
bs4
|
||||||
urllib3
|
urllib3
|
||||||
git+git://github.com/simon987/hexlib.git
|
git+git://github.com/simon987/hexlib.git
|
||||||
git+git://github.com/simon987/vanwanet_scrape.git
|
git+git://github.com/simon987/vanwanet_scrape.git
|
||||||
cloudscraper
|
cloudscraper
|
||||||
|
redis
|
174
run.py
174
run.py
@ -1,16 +1,14 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import sqlite3
|
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
from collections import defaultdict
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from queue import Queue
|
from queue import Queue
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
import redis
|
||||||
|
|
||||||
import pika
|
from hexlib.db import VolatileState
|
||||||
from hexlib.misc import buffered
|
|
||||||
from hexlib.monitoring import Monitoring
|
from hexlib.monitoring import Monitoring
|
||||||
|
|
||||||
from chan.chan import CHANS
|
from chan.chan import CHANS
|
||||||
@ -23,13 +21,23 @@ DBNAME = "chan_feed"
|
|||||||
if os.environ.get("CF_INFLUXDB"):
|
if os.environ.get("CF_INFLUXDB"):
|
||||||
influxdb = Monitoring(DBNAME, host=os.environ.get("CF_INFLUXDB"), logger=logger, batch_size=100, flush_on_exit=True)
|
influxdb = Monitoring(DBNAME, host=os.environ.get("CF_INFLUXDB"), logger=logger, batch_size=100, flush_on_exit=True)
|
||||||
MONITORING = True
|
MONITORING = True
|
||||||
|
else:
|
||||||
|
MONITORING = False
|
||||||
|
|
||||||
|
REDIS_HOST = os.environ.get("CF_REDIS_HOST", "localhost")
|
||||||
|
REDIS_PORT = os.environ.get("CF_REDIS_PORT", 6379)
|
||||||
|
CHAN = os.environ.get("CF_CHAN", None)
|
||||||
|
|
||||||
|
ARC_LISTS = os.environ.get("CF_ARC_LISTS", "arc,imhash").split(",")
|
||||||
|
|
||||||
|
PUB_CHANNEL = os.environ.get("CF_PUB_CHANNEL", "chan_feed")
|
||||||
|
|
||||||
|
|
||||||
class ChanScanner:
|
class ChanScanner:
|
||||||
def __init__(self, helper, proxy):
|
def __init__(self, helper, proxy):
|
||||||
self.web = Web(influxdb if MONITORING else None, rps=helper.rps, get_method=helper.get_method, proxy=proxy)
|
self.web = Web(influxdb if MONITORING else None, rps=helper.rps, get_method=helper.get_method, proxy=proxy)
|
||||||
self.helper = helper
|
self.helper = helper
|
||||||
self.state = ChanState()
|
self.state = state
|
||||||
|
|
||||||
def _threads(self, board):
|
def _threads(self, board):
|
||||||
r = self.web.get(self.helper.threads_url(board))
|
r = self.web.get(self.helper.threads_url(board))
|
||||||
@ -66,96 +74,50 @@ class ChanScanner:
|
|||||||
|
|
||||||
|
|
||||||
def once(func):
|
def once(func):
|
||||||
def wrapper(item, board, helper, channel, web):
|
def wrapper(item, board, helper):
|
||||||
if not state.has_visited(helper.item_unique_id(item, board), helper):
|
if not state.has_visited(helper.item_unique_id(item, board)):
|
||||||
func(item, board, helper, channel, web)
|
func(item, board, helper)
|
||||||
state.mark_visited(helper.item_unique_id(item, board), helper)
|
state.mark_visited(helper.item_unique_id(item, board))
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
class ChanState:
|
class ChanState:
|
||||||
def __init__(self):
|
def __init__(self, prefix):
|
||||||
self._db = "state.db"
|
self._state = VolatileState(prefix, 86400 * 7, host=REDIS_HOST, port=REDIS_PORT)
|
||||||
|
print("redis host=" + REDIS_HOST)
|
||||||
|
|
||||||
with sqlite3.connect(self._db) as conn:
|
def mark_visited(self, item: int):
|
||||||
conn.execute(
|
self._state["posts"][item] = 1
|
||||||
"CREATE TABLE IF NOT EXISTS posts "
|
|
||||||
"("
|
|
||||||
" post INT,"
|
|
||||||
" ts INT DEFAULT (strftime('%s','now')),"
|
|
||||||
" chan INT,"
|
|
||||||
" PRIMARY KEY(post, chan)"
|
|
||||||
")"
|
|
||||||
)
|
|
||||||
conn.execute(
|
|
||||||
"CREATE TABLE IF NOT EXISTS threads "
|
|
||||||
"("
|
|
||||||
" thread INT,"
|
|
||||||
" last_modified INT,"
|
|
||||||
" ts INT DEFAULT (strftime('%s','now')),"
|
|
||||||
" chan INT,"
|
|
||||||
" PRIMARY KEY(thread, chan)"
|
|
||||||
")"
|
|
||||||
)
|
|
||||||
conn.execute("PRAGMA journal_mode=wal")
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
def mark_visited(self, item: int, helper):
|
def has_visited(self, item: int):
|
||||||
with sqlite3.connect(self._db, timeout=10000) as conn:
|
return self._state["posts"][item] is not None
|
||||||
conn.execute(
|
|
||||||
"INSERT INTO posts (post, chan) VALUES (?,?)",
|
|
||||||
(item, helper.db_id)
|
|
||||||
)
|
|
||||||
|
|
||||||
def has_visited(self, item: int, helper):
|
|
||||||
with sqlite3.connect(self._db, timeout=10000) as conn:
|
|
||||||
cur = conn.cursor()
|
|
||||||
cur.execute(
|
|
||||||
"SELECT post FROM posts WHERE post=? AND chan=?",
|
|
||||||
(item, helper.db_id)
|
|
||||||
)
|
|
||||||
return cur.fetchone() is not None
|
|
||||||
|
|
||||||
def has_new_posts(self, thread, helper, board):
|
def has_new_posts(self, thread, helper, board):
|
||||||
mtime = helper.thread_mtime(thread)
|
mtime = helper.thread_mtime(thread)
|
||||||
if mtime == -1:
|
if mtime == -1:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
with sqlite3.connect(self._db, timeout=10000) as conn:
|
t = self._state["threads"][helper.item_unique_id(thread, board)]
|
||||||
cur = conn.cursor()
|
|
||||||
cur.execute(
|
if not t or helper.thread_mtime(thread) != t["last_modified"] or t["ts"] + 86400 < int(time.time()):
|
||||||
"SELECT last_modified, ts FROM threads WHERE thread=? AND chan=?",
|
return True
|
||||||
(helper.item_unique_id(thread, board), helper.db_id)
|
return False
|
||||||
)
|
|
||||||
row = cur.fetchone()
|
|
||||||
if not row or helper.thread_mtime(thread) != row[0] or row[1] + 86400 < int(time.time()):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def mark_thread_as_visited(self, thread, helper, board):
|
def mark_thread_as_visited(self, thread, helper, board):
|
||||||
with sqlite3.connect(self._db, timeout=10000) as conn:
|
self._state["threads"][helper.item_unique_id(thread, board)] = {
|
||||||
conn.execute(
|
"ts": time.time(),
|
||||||
"INSERT INTO threads (thread, last_modified, chan) "
|
"last_modified": helper.thread_mtime(thread)
|
||||||
"VALUES (?,?,?) "
|
}
|
||||||
"ON CONFLICT (thread, chan) "
|
|
||||||
"DO UPDATE SET last_modified=?, ts=(strftime('%s','now'))",
|
|
||||||
(helper.item_unique_id(thread, board), helper.thread_mtime(thread), helper.db_id,
|
|
||||||
helper.thread_mtime(thread))
|
|
||||||
)
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
|
|
||||||
def publish_worker(queue: Queue, helper, p):
|
def publish_worker(queue: Queue, helper, p):
|
||||||
channel = connect()
|
|
||||||
web = Web(influxdb if MONITORING else None, rps=helper.rps, get_method=helper.get_method, proxy=p)
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
item, board = queue.get()
|
item, board = queue.get()
|
||||||
if item is None:
|
if item is None:
|
||||||
break
|
break
|
||||||
publish(item, board, helper, channel, web)
|
publish(item, board, helper,)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(str(e) + ": " + traceback.format_exc())
|
logger.error(str(e) + ": " + traceback.format_exc())
|
||||||
@ -163,48 +125,22 @@ def publish_worker(queue: Queue, helper, p):
|
|||||||
queue.task_done()
|
queue.task_done()
|
||||||
|
|
||||||
|
|
||||||
@buffered(batch_size=150, flush_on_exit=True)
|
|
||||||
def _publish_buffered(items):
|
|
||||||
if not items:
|
|
||||||
return
|
|
||||||
|
|
||||||
buckets = defaultdict(list)
|
|
||||||
for item in items:
|
|
||||||
buckets[item[1]].append(item)
|
|
||||||
|
|
||||||
for bucket in buckets.values():
|
|
||||||
channel, routing_key, _ = bucket[0]
|
|
||||||
body = [item[2] for item in bucket]
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
channel.basic_publish(
|
|
||||||
exchange='chan',
|
|
||||||
routing_key=routing_key,
|
|
||||||
body=json.dumps(body, separators=(',', ':'), ensure_ascii=False, sort_keys=True)
|
|
||||||
)
|
|
||||||
logger.debug("RabbitMQ: published %d items" % len(body))
|
|
||||||
break
|
|
||||||
except Exception as e:
|
|
||||||
# logger.debug(traceback.format_exc())
|
|
||||||
logger.error(str(e))
|
|
||||||
time.sleep(0.5)
|
|
||||||
channel = connect()
|
|
||||||
|
|
||||||
|
|
||||||
@once
|
@once
|
||||||
def publish(item, board, helper, channel, web):
|
def publish(item, board, helper):
|
||||||
post_process(item, board, helper, web)
|
post_process(item, board, helper)
|
||||||
|
|
||||||
item_type = helper.item_type(item)
|
item_type = helper.item_type(item)
|
||||||
routing_key = "%s.%s.%s" % (chan, item_type, board)
|
routing_key = "%s.%s.%s" % (CHAN, item_type, board)
|
||||||
|
|
||||||
_publish_buffered([(channel, routing_key, item)])
|
message = json.dumps(item, separators=(',', ':'), ensure_ascii=False, sort_keys=True)
|
||||||
|
rdb.publish("chan." + routing_key, message)
|
||||||
|
for arc in ARC_LISTS:
|
||||||
|
rdb.lpush(arc + ".chan." + routing_key, message)
|
||||||
|
|
||||||
if MONITORING:
|
if MONITORING:
|
||||||
distance = datetime.utcnow() - datetime.utcfromtimestamp(helper.item_mtime(item))
|
distance = datetime.utcnow() - datetime.utcfromtimestamp(helper.item_mtime(item))
|
||||||
influxdb.log([{
|
influxdb.log([{
|
||||||
"measurement": chan,
|
"measurement": CHAN,
|
||||||
"time": str(datetime.utcnow()),
|
"time": str(datetime.utcnow()),
|
||||||
"tags": {
|
"tags": {
|
||||||
"board": board
|
"board": board
|
||||||
@ -215,24 +151,8 @@ def publish(item, board, helper, channel, web):
|
|||||||
}])
|
}])
|
||||||
|
|
||||||
|
|
||||||
def connect():
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
rabbit = pika.BlockingConnection(pika.ConnectionParameters(host=rabbitmq_host))
|
|
||||||
channel = rabbit.channel()
|
|
||||||
channel.exchange_declare(exchange="chan", exchange_type="topic")
|
|
||||||
return channel
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(str(e))
|
|
||||||
time.sleep(0.5)
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
chan_helper = CHANS[CHAN]
|
||||||
rabbitmq_host = os.environ.get("CF_MQ_HOST", "localhost")
|
|
||||||
chan = os.environ.get("CF_CHAN", None)
|
|
||||||
chan_helper = CHANS[chan]
|
|
||||||
save_folder = os.environ.get("CF_SAVE_FOLDER", "")
|
save_folder = os.environ.get("CF_SAVE_FOLDER", "")
|
||||||
|
|
||||||
if save_folder:
|
if save_folder:
|
||||||
@ -246,10 +166,11 @@ if __name__ == "__main__":
|
|||||||
if BYPASS_RPS:
|
if BYPASS_RPS:
|
||||||
chan_helper.rps = 10
|
chan_helper.rps = 10
|
||||||
|
|
||||||
state = ChanState()
|
state = ChanState(CHAN)
|
||||||
|
rdb = redis.Redis(host=REDIS_HOST, port=REDIS_PORT)
|
||||||
|
|
||||||
publish_q = Queue()
|
publish_q = Queue()
|
||||||
for _ in range(10):
|
for _ in range(3):
|
||||||
publish_thread = Thread(target=publish_worker, args=(publish_q, chan_helper, proxy))
|
publish_thread = Thread(target=publish_worker, args=(publish_q, chan_helper, proxy))
|
||||||
publish_thread.setDaemon(True)
|
publish_thread.setDaemon(True)
|
||||||
publish_thread.start()
|
publish_thread.start()
|
||||||
@ -260,6 +181,7 @@ if __name__ == "__main__":
|
|||||||
for p, b in s.all_posts():
|
for p, b in s.all_posts():
|
||||||
publish_q.put((p, b))
|
publish_q.put((p, b))
|
||||||
except KeyboardInterrupt as e:
|
except KeyboardInterrupt as e:
|
||||||
for _ in range(5):
|
print("cleanup..")
|
||||||
|
for _ in range(3):
|
||||||
publish_q.put((None, None))
|
publish_q.put((None, None))
|
||||||
break
|
break
|
||||||
|
Loading…
x
Reference in New Issue
Block a user