Fix for odd html listings

This commit is contained in:
Simon 2018-06-19 12:14:50 -04:00
parent e54609972c
commit d8486104b4

View File

@ -62,7 +62,11 @@ class HttpDirectory(RemoteDirectory):
"?C=N;O=D", "?C=N;O=D",
"?C=M;O=A", "?C=M;O=A",
"?C=S;O=A", "?C=S;O=A",
"?C=D;O=A" "?C=D;O=A",
"?MA",
"?SA",
"?DA",
"?ND"
) )
MAX_RETRIES = 3 MAX_RETRIES = 3
@ -75,7 +79,7 @@ class HttpDirectory(RemoteDirectory):
def list_dir(self, path): def list_dir(self, path):
path_url = self.base_url + path.strip("/") + "/" path_url = urljoin(self.base_url, path, "")
body = self._stream_body(path_url) body = self._stream_body(path_url)
if not body: if not body:
return None return None
@ -96,8 +100,7 @@ class HttpDirectory(RemoteDirectory):
is_dir=True is_dir=True
) )
else: else:
pass urls_to_request.append(urljoin(path_url, anchor.href))
urls_to_request.append(path_url + anchor.href)
for file in self.request_files(urls_to_request): for file in self.request_files(urls_to_request):
yield file yield file
@ -181,7 +184,7 @@ class HttpDirectory(RemoteDirectory):
return True return True
# Ignore external links # Ignore external links
full_url = os.path.join(base_url, link.href) full_url = urljoin(base_url, link.href)
if not full_url.startswith(base_url): if not full_url.startswith(base_url):
return True return True