docker tweaks

This commit is contained in:
simon987 2020-03-19 13:47:21 -04:00
parent 408a252368
commit 5eccee69cf
6 changed files with 434 additions and 15 deletions

View File

@ -1,10 +1,11 @@
FROM python:3.8
WORKDIR /app
ADD requirements.txt /app/requirements.txt
ADD requirements.txt /requirements.txt
RUN pip install -r requirements.txt
ENTRYPOINT ["python", "run.py"]
COPY . /app
RUN chmod 777 -R /app
WORKDIR /app
ENTRYPOINT ["python", "run.py"]

View File

@ -11,6 +11,7 @@ class ChanHelper:
self._boards = boards
self.rps = rps
self.get_method = None
self.save_folder = None
def boards(self):
return [b.replace("\\_", "_") for b in self._boards if not b.startswith("_")]

View File

@ -1,9 +1,13 @@
version: "2.1"
volumes:
influxdb_data:
pg_data:
services:
influxdb:
image: influxdb:alpine
volumes:
- ./influxdb_data:/var/lib/influxdb
- influxdb_data:/var/lib/influxdb
grafana:
image: grafana/grafana
ports:
@ -13,7 +17,7 @@ services:
db:
image: postgres
volumes:
- ./pg_data:/var/lib/postgresql/data
- pg_data:/var/lib/postgresql/data
environment:
- "POSTGRES_USER=feed_archiver"
- "POSTGRES_PASSWORD=changeme"
@ -44,11 +48,412 @@ services:
build: ./docker_viz/
ports:
- 127.0.0.1:3005:80
chan_4chan:
# Image boards
0chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=0chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
22chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=22chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
2chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=2chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
2chhk:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=2chhk"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
38chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=38chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
410chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=410chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
4chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=4chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
4kev:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=4kev"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
7chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=7chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
8kun:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=8kun"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
alokal:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=alokal"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
aurorachan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=aurorachan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
awsumchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=awsumchan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
chanon:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=chanon"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
chanorg:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=chanorg"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
desuchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=desuchan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
doushio:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=doushio"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
endchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=endchan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
fchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=fchan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
gnfos:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=gnfos"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
hispachan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=hispachan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
horochan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=horochan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
iichan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=iichan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
lainchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=lainchan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
lolnada:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=lolnada"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
nowere:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=nowere"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
plus4chan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=plus4chan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
sushigirl:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=sushigirl"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
synch:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=synch"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
tahta:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=tahta"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
tgchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=tgchan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
uboachan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=uboachan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
waifuist:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=waifuist"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"
wizchan:
image: simon987/chan_feed
restart: always
user: ${CURRENT_UID}
volumes:
- ${SAVE_FOLDER}:/data/
environment:
- "CF_CHAN=wizchan"
- "CF_MQ_HOST=rabbitmq"
- "CF_INFLUXDB=influxdb"
- "CF_SAVE_FOLDER=/data/"

View File

@ -1,7 +1,8 @@
import hashlib
import os
import zlib
from io import BytesIO
from urllib.parse import urljoin
from urllib.parse import urljoin, urlparse
import imagehash
from PIL import Image
@ -31,13 +32,23 @@ def _is_image(url):
return url.lower().endswith(IMAGE_FILETYPES)
def image_meta(url, url_idx, web):
def image_meta(url, url_idx, web, helper, board):
r = web.get(url)
if not r:
logger.warning("Could not download image")
return None
buf = r.content
sha1 = hashlib.sha1(buf).hexdigest()
if helper.save_folder:
path = os.path.join(helper.save_folder, str(helper.db_id), board)
path += "/" + sha1[0]
path += "/" + sha1[1:3]
os.makedirs(path, exist_ok=True)
with open(os.path.join(path, sha1 + os.path.splitext(url)[1]), "wb") as out:
out.write(buf)
try:
f = BytesIO(buf)
im = Image.open(f)
@ -47,7 +58,7 @@ def image_meta(url, url_idx, web):
"size": len(buf),
"width": im.width,
"height": im.height,
"sha1": hashlib.sha1(buf).hexdigest(),
"sha1": sha1,
"md5": hashlib.md5(buf).hexdigest(),
"crc32": format(zlib.crc32(buf), "x"),
"dhash": b64hash(imagehash.dhash(im, hash_size=12), 18),
@ -73,7 +84,7 @@ def post_process(item, board, helper, web):
item["_urls"] = helper.item_urls(item, board)
item["_img"] = [image_meta(url, i, web) for i, url in enumerate(item["_urls"]) if _is_image(url)]
item["_img"] = [image_meta(url, i, web, helper, board) for i, url in enumerate(item["_urls"]) if _is_image(url)]
return item

4
run.py
View File

@ -233,6 +233,10 @@ if __name__ == "__main__":
rabbitmq_host = os.environ.get("CF_MQ_HOST", "localhost")
chan = os.environ.get("CF_CHAN", None)
chan_helper = CHANS[chan]
save_folder = os.environ.get("CF_SAVE_FOLDER", "")
if save_folder:
chan_helper.save_folder = save_folder
proxy = None
if os.environ.get("CF_PROXY"):

View File

@ -16,11 +16,8 @@ logger = logging.getLogger("default")
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s %(levelname)-5s %(message)s')
file_handler = FileHandler("chan_feed.log")
file_handler.setFormatter(formatter)
for h in logger.handlers:
logger.removeHandler(h)
logger.addHandler(file_handler)
logger.addHandler(StreamHandler(sys.stdout))