mirror of
https://github.com/simon987/od-database.git
synced 2025-04-19 18:36:44 +00:00
Fix for odd html listings
This commit is contained in:
parent
e54609972c
commit
d8486104b4
@ -62,7 +62,11 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
"?C=N;O=D",
|
"?C=N;O=D",
|
||||||
"?C=M;O=A",
|
"?C=M;O=A",
|
||||||
"?C=S;O=A",
|
"?C=S;O=A",
|
||||||
"?C=D;O=A"
|
"?C=D;O=A",
|
||||||
|
"?MA",
|
||||||
|
"?SA",
|
||||||
|
"?DA",
|
||||||
|
"?ND"
|
||||||
)
|
)
|
||||||
MAX_RETRIES = 3
|
MAX_RETRIES = 3
|
||||||
|
|
||||||
@ -75,7 +79,7 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
|
|
||||||
def list_dir(self, path):
|
def list_dir(self, path):
|
||||||
|
|
||||||
path_url = self.base_url + path.strip("/") + "/"
|
path_url = urljoin(self.base_url, path, "")
|
||||||
body = self._stream_body(path_url)
|
body = self._stream_body(path_url)
|
||||||
if not body:
|
if not body:
|
||||||
return None
|
return None
|
||||||
@ -96,8 +100,7 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
is_dir=True
|
is_dir=True
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
pass
|
urls_to_request.append(urljoin(path_url, anchor.href))
|
||||||
urls_to_request.append(path_url + anchor.href)
|
|
||||||
|
|
||||||
for file in self.request_files(urls_to_request):
|
for file in self.request_files(urls_to_request):
|
||||||
yield file
|
yield file
|
||||||
@ -181,7 +184,7 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
# Ignore external links
|
# Ignore external links
|
||||||
full_url = os.path.join(base_url, link.href)
|
full_url = urljoin(base_url, link.href)
|
||||||
if not full_url.startswith(base_url):
|
if not full_url.startswith(base_url):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user