mirror of
				https://github.com/simon987/od-database.git
				synced 2025-10-25 19:56:51 +00:00 
			
		
		
		
	Disabled thread pool for headers requests in listing
This commit is contained in:
		
							parent
							
								
									b97b8f6784
								
							
						
					
					
						commit
						99d64b658b
					
				| @ -120,7 +120,6 @@ class RemoteDirectoryCrawler: | ||||
|         files_q.put(None) | ||||
|         file_writer_thread.join() | ||||
| 
 | ||||
| 
 | ||||
|         return CrawlResult(files_written[0], "success") | ||||
| 
 | ||||
|     def _process_listings(self, url: str, in_q: Queue, files_q: Queue): | ||||
|  | ||||
| @ -1,5 +1,4 @@ | ||||
| from urllib.parse import urljoin, unquote, quote | ||||
| 
 | ||||
| from urllib.parse import unquote | ||||
| import os | ||||
| from html.parser import HTMLParser | ||||
| from itertools import repeat | ||||
| @ -104,7 +103,7 @@ class HttpDirectory(RemoteDirectory): | ||||
| 
 | ||||
|     def request_files(self, urls_to_request: list) -> list: | ||||
| 
 | ||||
|         if len(urls_to_request) > 30: | ||||
|         if len(urls_to_request) > 3000000: | ||||
|             # Many urls, use multi-threaded solution | ||||
|             pool = ThreadPool(processes=10) | ||||
|             files = pool.starmap(HttpDirectory._request_file, zip(repeat(self), urls_to_request)) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user