mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-04-24 12:35:51 +00:00
64 lines
1.6 KiB
Python
64 lines
1.6 KiB
Python
import os
|
|
import hashlib
|
|
import mimetypes
|
|
from PIL import Image
|
|
import simplejson
|
|
|
|
rootDir = "/home/simon/Documents"
|
|
|
|
|
|
# https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
|
|
def md5sum(filename, block_size=65536):
|
|
hash = hashlib.md5()
|
|
with open(filename, "rb") as f:
|
|
for block in iter(lambda: f.read(block_size), b""):
|
|
hash.update(block)
|
|
return hash.hexdigest()
|
|
|
|
|
|
def crawl(root_dir):
|
|
|
|
docs = []
|
|
|
|
for root, subdirs, files in os.walk(root_dir):
|
|
|
|
print(root)
|
|
|
|
for filename in files:
|
|
full_path = os.path.join(root, filename)
|
|
|
|
doc = dict()
|
|
|
|
doc["md5"] = md5sum(os.path.join(root, filename))
|
|
doc["path"] = root
|
|
doc["name"] = filename
|
|
doc["size"] = os.path.getsize(full_path)
|
|
doc["mtime"] = int(os.path.getmtime(full_path))
|
|
|
|
mime_type = mimetypes.guess_type(full_path)[0]
|
|
|
|
if mime_type is not None:
|
|
|
|
doc["mime"] = mime_type
|
|
|
|
if mime_type.startswith("image"):
|
|
try:
|
|
width, height = Image.open(full_path).size
|
|
|
|
doc["width"] = width
|
|
doc["height"] = height
|
|
except OSError:
|
|
doc.pop('mime', None)
|
|
pass
|
|
except ValueError:
|
|
doc.pop('mime', None)
|
|
pass
|
|
|
|
docs.append(doc)
|
|
|
|
file = open("crawler.json", "w")
|
|
file.write(simplejson.dumps(docs))
|
|
file.close()
|
|
|
|
|
|
crawl(rootDir) |