From 152a6f20fb6bd9482b527ac3b24fc7a671b79b31 Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 15 Jul 2018 08:54:36 -0400 Subject: [PATCH] Re-enabled multi threaded file requests for large directories --- crawl_server/remote_http.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crawl_server/remote_http.py b/crawl_server/remote_http.py index c3a3083..c6a4718 100644 --- a/crawl_server/remote_http.py +++ b/crawl_server/remote_http.py @@ -89,7 +89,7 @@ class HttpDirectory(RemoteDirectory): "../" ) - MAX_RETRIES = 3 + MAX_RETRIES = 2 def __init__(self, url): super().__init__(url) @@ -138,7 +138,7 @@ class HttpDirectory(RemoteDirectory): def request_files(self, urls_to_request: list) -> list: - if len(urls_to_request) > 3000000: + if len(urls_to_request) > 150: # Many urls, use multi-threaded solution pool = ThreadPool(processes=10) files = pool.starmap(HttpDirectory._request_file, zip(repeat(self), urls_to_request))