mirror of
https://github.com/simon987/od-database.git
synced 2025-04-20 02:46:45 +00:00
Disabled thread pool for headers requests in listing
This commit is contained in:
parent
b97b8f6784
commit
99d64b658b
@ -120,7 +120,6 @@ class RemoteDirectoryCrawler:
|
|||||||
files_q.put(None)
|
files_q.put(None)
|
||||||
file_writer_thread.join()
|
file_writer_thread.join()
|
||||||
|
|
||||||
|
|
||||||
return CrawlResult(files_written[0], "success")
|
return CrawlResult(files_written[0], "success")
|
||||||
|
|
||||||
def _process_listings(self, url: str, in_q: Queue, files_q: Queue):
|
def _process_listings(self, url: str, in_q: Queue, files_q: Queue):
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
from urllib.parse import urljoin, unquote, quote
|
from urllib.parse import unquote
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from itertools import repeat
|
from itertools import repeat
|
||||||
@ -104,7 +103,7 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
|
|
||||||
def request_files(self, urls_to_request: list) -> list:
|
def request_files(self, urls_to_request: list) -> list:
|
||||||
|
|
||||||
if len(urls_to_request) > 30:
|
if len(urls_to_request) > 3000000:
|
||||||
# Many urls, use multi-threaded solution
|
# Many urls, use multi-threaded solution
|
||||||
pool = ThreadPool(processes=10)
|
pool = ThreadPool(processes=10)
|
||||||
files = pool.starmap(HttpDirectory._request_file, zip(repeat(self), urls_to_request))
|
files = pool.starmap(HttpDirectory._request_file, zip(repeat(self), urls_to_request))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user