mirror of
https://github.com/simon987/od-database.git
synced 2025-04-18 09:56:43 +00:00
99 lines
2.7 KiB
Python
99 lines
2.7 KiB
Python
import os
|
|
import json
|
|
import shutil
|
|
from search.search import ElasticSearchEngine
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
import requests
|
|
import random
|
|
|
|
terms = requests.get("https://svnweb.freebsd.org/csrg/share/dict/words?view=co&content-type=text/plain") \
|
|
.text.splitlines()
|
|
exts = [
|
|
"zip", "exe", "mp3", "avi", "mp4", "rar", "7zip", "ogg", "m4a", "flac", "doc", "docx", "aac", "xls",
|
|
"cab", "txt", "c", "java", "class", "jar", "py", "cpp", "h", "png", "jpg", "jpeg", "ttf", "torrent",
|
|
"part", "blend", "3ds", "obj", "ico", "html", "css", "js", "ts", "ape", "asm", "nasm", "fasm", "o",
|
|
"so", "dll", "tar", "gz", "bin", "cad", "cmd", "bat", "sh", "md"
|
|
]
|
|
|
|
|
|
def dump_local_filesystem(root_dir: str):
|
|
|
|
docs = []
|
|
|
|
for root, dirs, files in os.walk(root_dir):
|
|
|
|
for filename in files:
|
|
full_path = os.path.join(root, filename)
|
|
stats = os.stat(full_path)
|
|
|
|
doc = dict()
|
|
doc["name"] = filename
|
|
doc["path"] = root
|
|
doc["mtime"] = stats.st_mtime
|
|
doc["size"] = stats.st_size
|
|
|
|
docs.append(doc)
|
|
|
|
with open("local_filesystem.json", "w") as f:
|
|
f.writelines(json.dumps(doc) + "\n" for doc in docs)
|
|
|
|
|
|
def random_path():
|
|
return "/".join(random.choices(terms, k=random.randint(1, 5)))
|
|
|
|
|
|
def random_file_name():
|
|
return random.choice(["_", " ", "-", ".", "#", ""]).\
|
|
join(random.choices(terms, k=random.randint(1, 3))) + "." + random.choice(exts)
|
|
|
|
|
|
def get_random_file():
|
|
|
|
doc = dict()
|
|
doc["name"] = random_file_name()
|
|
doc["path"] = random_path()
|
|
doc["mtime"] = random.randint(0, 1000000000000)
|
|
doc["size"] = random.randint(-1, 1000000000)
|
|
|
|
return doc
|
|
|
|
|
|
def dump_random_files(count=10):
|
|
with open("random_dump.json", "w") as f:
|
|
f.writelines(json.dumps(get_random_file()) + "\n" for _ in range(count))
|
|
|
|
|
|
def index_file_list(path: str, website_id):
|
|
|
|
es = ElasticSearchEngine("od-database")
|
|
with open(path, "r") as f:
|
|
es.import_json(f.readlines(), website_id)
|
|
|
|
|
|
def search(term=""):
|
|
requests.get("http://localhost/search?q=" + term, verify=False)
|
|
print(term)
|
|
|
|
|
|
def random_searches(count=10000000, max_workers=1000):
|
|
|
|
pool = ThreadPoolExecutor(max_workers=max_workers)
|
|
pool.map(search, random.choices(terms, k=count))
|
|
|
|
|
|
def make_wide_filesystem(count=100000):
|
|
|
|
shutil.rmtree("stress_test")
|
|
os.mkdir("stress_test")
|
|
for _ in range(count):
|
|
new_path = "stress_test/" + random.choice(terms)
|
|
if not os.path.exists(new_path):
|
|
os.mkdir(new_path)
|
|
|
|
|
|
# dump_local_filesystem("/mnt/")
|
|
# index_file_list("local_filesystem.json", 4)
|
|
# random_searches(100000)
|
|
# dump_random_files(20000 * 100000)
|
|
# make_wide_filesystem(10000)
|