mirror of
https://github.com/simon987/chan_feed.git
synced 2025-12-15 13:29:03 +00:00
download & save image metadata
This commit is contained in:
@@ -1,10 +1,77 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import re
|
||||
import zlib
|
||||
from io import BytesIO
|
||||
|
||||
import imagehash
|
||||
from PIL import Image
|
||||
|
||||
from util import logger
|
||||
|
||||
LINK_RE = re.compile(r"(https?://[\w\-_.]+\.[a-z]{2,4}([^\s<'\"]*|$))")
|
||||
|
||||
IMAGE_FILETYPES = (
|
||||
# :orig for twitter cdn
|
||||
'.jpg',
|
||||
'.jpg:orig',
|
||||
'.jpeg',
|
||||
'.jpeg:orig',
|
||||
'.png',
|
||||
'.png:orig',
|
||||
'.gif',
|
||||
'.gif:orig',
|
||||
'.tiff',
|
||||
'.bmp',
|
||||
'.webp'
|
||||
)
|
||||
|
||||
def post_process(item, board, helper):
|
||||
item["_v"] = 1.3
|
||||
|
||||
def _is_image(url):
|
||||
return url.lower().endswith(IMAGE_FILETYPES)
|
||||
|
||||
|
||||
def b64hash(imhash, bcount):
|
||||
return base64.b64encode(
|
||||
sum(1 << i for i, b in enumerate(imhash.hash.flatten()) if b).to_bytes(bcount, "big")
|
||||
).decode("ascii")
|
||||
|
||||
|
||||
def image_meta(url, url_idx, web):
|
||||
r = web.get(url)
|
||||
if not r:
|
||||
logger.warning("")
|
||||
return None
|
||||
buf = r.content
|
||||
|
||||
try:
|
||||
f = BytesIO(buf)
|
||||
im = Image.open(f)
|
||||
except Exception as e:
|
||||
logger.warning("exception during image open: " + str(e))
|
||||
return None
|
||||
|
||||
meta = {
|
||||
"url": url_idx,
|
||||
"size": len(buf),
|
||||
"width": im.width,
|
||||
"height": im.height,
|
||||
"sha1": hashlib.sha1(buf).hexdigest(),
|
||||
"md5": hashlib.md5(buf).hexdigest(),
|
||||
"crc32": format(zlib.crc32(buf), "x"),
|
||||
"dhash": b64hash(imagehash.dhash(im, hash_size=12), 18),
|
||||
"phash": b64hash(imagehash.phash(im, hash_size=12), 18),
|
||||
"ahash": b64hash(imagehash.average_hash(im, hash_size=12), 18),
|
||||
"whash": b64hash(imagehash.whash(im, hash_size=8), 8),
|
||||
}
|
||||
|
||||
del im, r, buf
|
||||
|
||||
return meta
|
||||
|
||||
|
||||
def post_process(item, board, helper, web):
|
||||
item["_v"] = 1.4
|
||||
item["_id"] = helper.item_unique_id(item, board)
|
||||
|
||||
item["_board"] = board
|
||||
@@ -12,6 +79,8 @@ def post_process(item, board, helper):
|
||||
|
||||
item["_urls"] = helper.item_urls(item, board)
|
||||
|
||||
item["_img"] = [image_meta(url, i, web) for i, url in enumerate(item["_urls"]) if _is_image(url)]
|
||||
|
||||
return item
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user