mirror of
https://github.com/simon987/chan_feed.git
synced 2025-04-04 08:23:05 +00:00
docker tweaks
This commit is contained in:
parent
408a252368
commit
5eccee69cf
11
Dockerfile
11
Dockerfile
@ -1,10 +1,11 @@
|
||||
FROM python:3.8
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ADD requirements.txt /app/requirements.txt
|
||||
ADD requirements.txt /requirements.txt
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
ENTRYPOINT ["python", "run.py"]
|
||||
|
||||
COPY . /app
|
||||
|
||||
RUN chmod 777 -R /app
|
||||
|
||||
WORKDIR /app
|
||||
ENTRYPOINT ["python", "run.py"]
|
||||
|
@ -11,6 +11,7 @@ class ChanHelper:
|
||||
self._boards = boards
|
||||
self.rps = rps
|
||||
self.get_method = None
|
||||
self.save_folder = None
|
||||
|
||||
def boards(self):
|
||||
return [b.replace("\\_", "_") for b in self._boards if not b.startswith("_")]
|
||||
|
@ -1,9 +1,13 @@
|
||||
version: "2.1"
|
||||
volumes:
|
||||
influxdb_data:
|
||||
pg_data:
|
||||
|
||||
services:
|
||||
influxdb:
|
||||
image: influxdb:alpine
|
||||
volumes:
|
||||
- ./influxdb_data:/var/lib/influxdb
|
||||
- influxdb_data:/var/lib/influxdb
|
||||
grafana:
|
||||
image: grafana/grafana
|
||||
ports:
|
||||
@ -13,7 +17,7 @@ services:
|
||||
db:
|
||||
image: postgres
|
||||
volumes:
|
||||
- ./pg_data:/var/lib/postgresql/data
|
||||
- pg_data:/var/lib/postgresql/data
|
||||
environment:
|
||||
- "POSTGRES_USER=feed_archiver"
|
||||
- "POSTGRES_PASSWORD=changeme"
|
||||
@ -44,11 +48,412 @@ services:
|
||||
build: ./docker_viz/
|
||||
ports:
|
||||
- 127.0.0.1:3005:80
|
||||
chan_4chan:
|
||||
# Image boards
|
||||
0chan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=0chan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
22chan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=22chan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
2chan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=2chan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
|
||||
2chhk:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=2chhk"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
38chan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=38chan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
410chan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=410chan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
4chan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=4chan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
4kev:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=4kev"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
7chan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=7chan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
8kun:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=8kun"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
alokal:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=alokal"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
aurorachan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=aurorachan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
awsumchan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=awsumchan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
chanon:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=chanon"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
chanorg:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=chanorg"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
desuchan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=desuchan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
doushio:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=doushio"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
endchan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=endchan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
fchan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=fchan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
gnfos:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=gnfos"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
hispachan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=hispachan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
horochan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=horochan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
iichan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=iichan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
lainchan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=lainchan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
lolnada:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=lolnada"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
nowere:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=nowere"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
plus4chan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=plus4chan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
sushigirl:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=sushigirl"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
synch:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=synch"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
tahta:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=tahta"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
tgchan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=tgchan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
uboachan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=uboachan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
waifuist:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=waifuist"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
||||
wizchan:
|
||||
image: simon987/chan_feed
|
||||
restart: always
|
||||
user: ${CURRENT_UID}
|
||||
volumes:
|
||||
- ${SAVE_FOLDER}:/data/
|
||||
environment:
|
||||
- "CF_CHAN=wizchan"
|
||||
- "CF_MQ_HOST=rabbitmq"
|
||||
- "CF_INFLUXDB=influxdb"
|
||||
- "CF_SAVE_FOLDER=/data/"
|
||||
|
@ -1,7 +1,8 @@
|
||||
import hashlib
|
||||
import os
|
||||
import zlib
|
||||
from io import BytesIO
|
||||
from urllib.parse import urljoin
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
import imagehash
|
||||
from PIL import Image
|
||||
@ -31,13 +32,23 @@ def _is_image(url):
|
||||
return url.lower().endswith(IMAGE_FILETYPES)
|
||||
|
||||
|
||||
def image_meta(url, url_idx, web):
|
||||
def image_meta(url, url_idx, web, helper, board):
|
||||
r = web.get(url)
|
||||
if not r:
|
||||
logger.warning("Could not download image")
|
||||
return None
|
||||
buf = r.content
|
||||
|
||||
sha1 = hashlib.sha1(buf).hexdigest()
|
||||
|
||||
if helper.save_folder:
|
||||
path = os.path.join(helper.save_folder, str(helper.db_id), board)
|
||||
path += "/" + sha1[0]
|
||||
path += "/" + sha1[1:3]
|
||||
os.makedirs(path, exist_ok=True)
|
||||
with open(os.path.join(path, sha1 + os.path.splitext(url)[1]), "wb") as out:
|
||||
out.write(buf)
|
||||
|
||||
try:
|
||||
f = BytesIO(buf)
|
||||
im = Image.open(f)
|
||||
@ -47,7 +58,7 @@ def image_meta(url, url_idx, web):
|
||||
"size": len(buf),
|
||||
"width": im.width,
|
||||
"height": im.height,
|
||||
"sha1": hashlib.sha1(buf).hexdigest(),
|
||||
"sha1": sha1,
|
||||
"md5": hashlib.md5(buf).hexdigest(),
|
||||
"crc32": format(zlib.crc32(buf), "x"),
|
||||
"dhash": b64hash(imagehash.dhash(im, hash_size=12), 18),
|
||||
@ -73,7 +84,7 @@ def post_process(item, board, helper, web):
|
||||
|
||||
item["_urls"] = helper.item_urls(item, board)
|
||||
|
||||
item["_img"] = [image_meta(url, i, web) for i, url in enumerate(item["_urls"]) if _is_image(url)]
|
||||
item["_img"] = [image_meta(url, i, web, helper, board) for i, url in enumerate(item["_urls"]) if _is_image(url)]
|
||||
|
||||
return item
|
||||
|
||||
|
4
run.py
4
run.py
@ -233,6 +233,10 @@ if __name__ == "__main__":
|
||||
rabbitmq_host = os.environ.get("CF_MQ_HOST", "localhost")
|
||||
chan = os.environ.get("CF_CHAN", None)
|
||||
chan_helper = CHANS[chan]
|
||||
save_folder = os.environ.get("CF_SAVE_FOLDER", "")
|
||||
|
||||
if save_folder:
|
||||
chan_helper.save_folder = save_folder
|
||||
|
||||
proxy = None
|
||||
if os.environ.get("CF_PROXY"):
|
||||
|
3
util.py
3
util.py
@ -16,11 +16,8 @@ logger = logging.getLogger("default")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
formatter = logging.Formatter('%(asctime)s %(levelname)-5s %(message)s')
|
||||
file_handler = FileHandler("chan_feed.log")
|
||||
file_handler.setFormatter(formatter)
|
||||
for h in logger.handlers:
|
||||
logger.removeHandler(h)
|
||||
logger.addHandler(file_handler)
|
||||
logger.addHandler(StreamHandler(sys.stdout))
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user