docker tweaks

This commit is contained in:
simon987 2020-03-19 13:47:21 -04:00
parent 408a252368
commit 5eccee69cf
6 changed files with 434 additions and 15 deletions

View File

@ -1,10 +1,11 @@
FROM python:3.8 FROM python:3.8
WORKDIR /app ADD requirements.txt /requirements.txt
ADD requirements.txt /app/requirements.txt
RUN pip install -r requirements.txt RUN pip install -r requirements.txt
ENTRYPOINT ["python", "run.py"]
COPY . /app COPY . /app
RUN chmod 777 -R /app
WORKDIR /app
ENTRYPOINT ["python", "run.py"]

View File

@ -11,6 +11,7 @@ class ChanHelper:
self._boards = boards self._boards = boards
self.rps = rps self.rps = rps
self.get_method = None self.get_method = None
self.save_folder = None
def boards(self): def boards(self):
return [b.replace("\\_", "_") for b in self._boards if not b.startswith("_")] return [b.replace("\\_", "_") for b in self._boards if not b.startswith("_")]

View File

@ -1,9 +1,13 @@
version: "2.1" version: "2.1"
volumes:
influxdb_data:
pg_data:
services: services:
influxdb: influxdb:
image: influxdb:alpine image: influxdb:alpine
volumes: volumes:
- ./influxdb_data:/var/lib/influxdb - influxdb_data:/var/lib/influxdb
grafana: grafana:
image: grafana/grafana image: grafana/grafana
ports: ports:
@ -13,7 +17,7 @@ services:
db: db:
image: postgres image: postgres
volumes: volumes:
- ./pg_data:/var/lib/postgresql/data - pg_data:/var/lib/postgresql/data
environment: environment:
- "POSTGRES_USER=feed_archiver" - "POSTGRES_USER=feed_archiver"
- "POSTGRES_PASSWORD=changeme" - "POSTGRES_PASSWORD=changeme"
@ -44,11 +48,412 @@ services:
build: ./docker_viz/ build: ./docker_viz/
ports: ports:
- 127.0.0.1:3005:80 - 127.0.0.1:3005:80
chan_4chan: # Image boards
0chan:
image: simon987/chan_feed image: simon987/chan_feed
restart: always restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=0chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
22chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=22chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
2chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=2chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
2chhk:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=2chhk"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
38chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=38chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
410chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=410chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
4chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment: environment:
- "CF_CHAN=4chan" - "CF_CHAN=4chan"
- "CF_MQ_HOST=rabbitmq" - "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb" - "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
4kev:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=4kev"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
7chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=7chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
8kun:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=8kun"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
alokal:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=alokal"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
aurorachan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=aurorachan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
awsumchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=awsumchan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
chanon:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=chanon"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
chanorg:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=chanorg"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
desuchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=desuchan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
doushio:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=doushio"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
endchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=endchan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
fchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=fchan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
gnfos:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=gnfos"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
hispachan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=hispachan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
horochan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=horochan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
iichan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=iichan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
lainchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=lainchan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
lolnada:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=lolnada"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
nowere:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=nowere"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
plus4chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=plus4chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
sushigirl:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=sushigirl"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
synch:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=synch"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
tahta:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=tahta"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
tgchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=tgchan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
uboachan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=uboachan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
waifuist:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=waifuist"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
wizchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=wizchan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"

View File

@ -1,7 +1,8 @@
import hashlib import hashlib
import os
import zlib import zlib
from io import BytesIO from io import BytesIO
from urllib.parse import urljoin from urllib.parse import urljoin, urlparse
import imagehash import imagehash
from PIL import Image from PIL import Image
@ -31,13 +32,23 @@ def _is_image(url):
return url.lower().endswith(IMAGE_FILETYPES) return url.lower().endswith(IMAGE_FILETYPES)
def image_meta(url, url_idx, web): def image_meta(url, url_idx, web, helper, board):
r = web.get(url) r = web.get(url)
if not r: if not r:
logger.warning("Could not download image") logger.warning("Could not download image")
return None return None
buf = r.content buf = r.content
sha1 = hashlib.sha1(buf).hexdigest()
if helper.save_folder:
path = os.path.join(helper.save_folder, str(helper.db_id), board)
path += "/" + sha1[0]
path += "/" + sha1[1:3]
os.makedirs(path, exist_ok=True)
with open(os.path.join(path, sha1 + os.path.splitext(url)[1]), "wb") as out:
out.write(buf)
try: try:
f = BytesIO(buf) f = BytesIO(buf)
im = Image.open(f) im = Image.open(f)
@ -47,7 +58,7 @@ def image_meta(url, url_idx, web):
"size": len(buf), "size": len(buf),
"width": im.width, "width": im.width,
"height": im.height, "height": im.height,
"sha1": hashlib.sha1(buf).hexdigest(), "sha1": sha1,
"md5": hashlib.md5(buf).hexdigest(), "md5": hashlib.md5(buf).hexdigest(),
"crc32": format(zlib.crc32(buf), "x"), "crc32": format(zlib.crc32(buf), "x"),
"dhash": b64hash(imagehash.dhash(im, hash_size=12), 18), "dhash": b64hash(imagehash.dhash(im, hash_size=12), 18),
@ -73,7 +84,7 @@ def post_process(item, board, helper, web):
item["_urls"] = helper.item_urls(item, board) item["_urls"] = helper.item_urls(item, board)
item["_img"] = [image_meta(url, i, web) for i, url in enumerate(item["_urls"]) if _is_image(url)] item["_img"] = [image_meta(url, i, web, helper, board) for i, url in enumerate(item["_urls"]) if _is_image(url)]
return item return item

4
run.py
View File

@ -233,6 +233,10 @@ if __name__ == "__main__":
rabbitmq_host = os.environ.get("CF_MQ_HOST", "localhost") rabbitmq_host = os.environ.get("CF_MQ_HOST", "localhost")
chan = os.environ.get("CF_CHAN", None) chan = os.environ.get("CF_CHAN", None)
chan_helper = CHANS[chan] chan_helper = CHANS[chan]
save_folder = os.environ.get("CF_SAVE_FOLDER", "")
if save_folder:
chan_helper.save_folder = save_folder
proxy = None proxy = None
if os.environ.get("CF_PROXY"): if os.environ.get("CF_PROXY"):

View File

@ -16,11 +16,8 @@ logger = logging.getLogger("default")
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s %(levelname)-5s %(message)s') formatter = logging.Formatter('%(asctime)s %(levelname)-5s %(message)s')
file_handler = FileHandler("chan_feed.log")
file_handler.setFormatter(formatter)
for h in logger.handlers: for h in logger.handlers:
logger.removeHandler(h) logger.removeHandler(h)
logger.addHandler(file_handler)
logger.addHandler(StreamHandler(sys.stdout)) logger.addHandler(StreamHandler(sys.stdout))