Disabled thread pool for headers requests in listing

This commit is contained in:
Simon 2018-06-18 10:33:33 -04:00
parent b97b8f6784
commit 99d64b658b
2 changed files with 2 additions and 4 deletions

View File

@ -120,7 +120,6 @@ class RemoteDirectoryCrawler:
files_q.put(None) files_q.put(None)
file_writer_thread.join() file_writer_thread.join()
return CrawlResult(files_written[0], "success") return CrawlResult(files_written[0], "success")
def _process_listings(self, url: str, in_q: Queue, files_q: Queue): def _process_listings(self, url: str, in_q: Queue, files_q: Queue):

View File

@ -1,5 +1,4 @@
from urllib.parse import urljoin, unquote, quote from urllib.parse import unquote
import os import os
from html.parser import HTMLParser from html.parser import HTMLParser
from itertools import repeat from itertools import repeat
@ -104,7 +103,7 @@ class HttpDirectory(RemoteDirectory):
def request_files(self, urls_to_request: list) -> list: def request_files(self, urls_to_request: list) -> list:
if len(urls_to_request) > 30: if len(urls_to_request) > 3000000:
# Many urls, use multi-threaded solution # Many urls, use multi-threaded solution
pool = ThreadPool(processes=10) pool = ThreadPool(processes=10)
files = pool.starmap(HttpDirectory._request_file, zip(repeat(self), urls_to_request)) files = pool.starmap(HttpDirectory._request_file, zip(repeat(self), urls_to_request))