Indexing after crawling is a bit more efficient

2025-12-14 07:09:03 +00:00 · 2018-06-14 16:41:43 -04:00
parent 83ca579ec7
commit dffd032659
5 changed files with 21 additions and 30 deletions
--- a/stress_test.py
+++ b/stress_test.py
@@ -51,8 +51,8 @@ def get_random_file():
    doc = dict()
    doc["name"] = random_file_name()
    doc["path"] = random_path()
-    doc["mtime"] = random.randint(0, 10000000)
-    doc["size"] = random.randint(-1, 100000000000000)
+    doc["mtime"] = random.randint(0, 1000000000000)
+    doc["size"] = random.randint(-1, 1000000000)

    return doc

@@ -80,8 +80,7 @@ def random_searches(count=10000000, max_workers=1000):
    pool.map(search, random.choices(terms, k=count))


-
 # dump_local_filesystem("/mnt/")
-# index_file_list("crawl_server/crawled/123.json", 10)
+# index_file_list("random_dump.json", 1000)
 # random_searches(100000)
-dump_random_files(20000 * 100000)
+# dump_random_files(20000 * 100000)