Crawler performance improvements

This commit is contained in:
Simon
2018-07-25 11:27:50 -04:00
parent fbbe952e4d
commit 34d1f375a8
2 changed files with 8 additions and 7 deletions

View File

@@ -102,7 +102,7 @@ class HttpDirectory(RemoteDirectory):
def list_dir(self, path):
current_dir_name = path[path.rstrip("/").rfind("/") + 1: -1]
path_identifier = hashlib.sha1(current_dir_name.encode())
path_identifier = hashlib.md5(current_dir_name.encode())
path_url = urljoin(self.base_url, path, "")
body = self._stream_body(path_url)
anchors = self._parse_links(body)