mirror of
				https://github.com/simon987/od-database.git
				synced 2025-10-25 19:56:51 +00:00 
			
		
		
		
	Fix for odd html listings
This commit is contained in:
		
							parent
							
								
									e54609972c
								
							
						
					
					
						commit
						d8486104b4
					
				| @ -62,7 +62,11 @@ class HttpDirectory(RemoteDirectory): | |||||||
|         "?C=N;O=D", |         "?C=N;O=D", | ||||||
|         "?C=M;O=A", |         "?C=M;O=A", | ||||||
|         "?C=S;O=A", |         "?C=S;O=A", | ||||||
|         "?C=D;O=A" |         "?C=D;O=A", | ||||||
|  |         "?MA", | ||||||
|  |         "?SA", | ||||||
|  |         "?DA", | ||||||
|  |         "?ND" | ||||||
|     ) |     ) | ||||||
|     MAX_RETRIES = 3 |     MAX_RETRIES = 3 | ||||||
| 
 | 
 | ||||||
| @ -75,7 +79,7 @@ class HttpDirectory(RemoteDirectory): | |||||||
| 
 | 
 | ||||||
|     def list_dir(self, path): |     def list_dir(self, path): | ||||||
| 
 | 
 | ||||||
|         path_url = self.base_url + path.strip("/") + "/" |         path_url = urljoin(self.base_url, path, "") | ||||||
|         body = self._stream_body(path_url) |         body = self._stream_body(path_url) | ||||||
|         if not body: |         if not body: | ||||||
|             return None |             return None | ||||||
| @ -96,8 +100,7 @@ class HttpDirectory(RemoteDirectory): | |||||||
|                     is_dir=True |                     is_dir=True | ||||||
|                 ) |                 ) | ||||||
|             else: |             else: | ||||||
|                 pass |                 urls_to_request.append(urljoin(path_url, anchor.href)) | ||||||
|                 urls_to_request.append(path_url + anchor.href) |  | ||||||
| 
 | 
 | ||||||
|         for file in self.request_files(urls_to_request): |         for file in self.request_files(urls_to_request): | ||||||
|             yield file |             yield file | ||||||
| @ -181,7 +184,7 @@ class HttpDirectory(RemoteDirectory): | |||||||
|             return True |             return True | ||||||
| 
 | 
 | ||||||
|         # Ignore external links |         # Ignore external links | ||||||
|         full_url = os.path.join(base_url, link.href) |         full_url = urljoin(base_url, link.href) | ||||||
|         if not full_url.startswith(base_url): |         if not full_url.startswith(base_url): | ||||||
|             return True |             return True | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user