mirror of
https://github.com/simon987/od-database.git
synced 2025-04-20 10:56:47 +00:00
Disabled thread pool for headers requests in listing
This commit is contained in:
parent
b97b8f6784
commit
99d64b658b
@ -120,7 +120,6 @@ class RemoteDirectoryCrawler:
|
||||
files_q.put(None)
|
||||
file_writer_thread.join()
|
||||
|
||||
|
||||
return CrawlResult(files_written[0], "success")
|
||||
|
||||
def _process_listings(self, url: str, in_q: Queue, files_q: Queue):
|
||||
|
@ -1,5 +1,4 @@
|
||||
from urllib.parse import urljoin, unquote, quote
|
||||
|
||||
from urllib.parse import unquote
|
||||
import os
|
||||
from html.parser import HTMLParser
|
||||
from itertools import repeat
|
||||
@ -104,7 +103,7 @@ class HttpDirectory(RemoteDirectory):
|
||||
|
||||
def request_files(self, urls_to_request: list) -> list:
|
||||
|
||||
if len(urls_to_request) > 30:
|
||||
if len(urls_to_request) > 3000000:
|
||||
# Many urls, use multi-threaded solution
|
||||
pool = ThreadPool(processes=10)
|
||||
files = pool.starmap(HttpDirectory._request_file, zip(repeat(self), urls_to_request))
|
||||
|
Loading…
x
Reference in New Issue
Block a user