mirror of
https://github.com/simon987/od-database.git
synced 2025-12-14 15:19:03 +00:00
Crawler performance improvements
This commit is contained in:
@@ -102,7 +102,7 @@ class HttpDirectory(RemoteDirectory):
|
||||
def list_dir(self, path):
|
||||
|
||||
current_dir_name = path[path.rstrip("/").rfind("/") + 1: -1]
|
||||
path_identifier = hashlib.sha1(current_dir_name.encode())
|
||||
path_identifier = hashlib.md5(current_dir_name.encode())
|
||||
path_url = urljoin(self.base_url, path, "")
|
||||
body = self._stream_body(path_url)
|
||||
anchors = self._parse_links(body)
|
||||
|
||||
Reference in New Issue
Block a user