Skip 'Parent directory' links more efficiently

This commit is contained in:
Simon 2018-07-17 11:20:58 -04:00
parent 756e331c83
commit 55a0fde19d

View File

@ -86,7 +86,7 @@ class HttpDirectory(RemoteDirectory):
FILE_NAME_BLACKLIST = ( FILE_NAME_BLACKLIST = (
"Parent Directory", "Parent Directory",
" Parent Directory" " Parent Directory"
"../" "../",
) )
MAX_RETRIES = 2 MAX_RETRIES = 2
@ -113,7 +113,7 @@ class HttpDirectory(RemoteDirectory):
files = [] files = []
for anchor in anchors: for anchor in anchors:
if self._should_ignore(self.base_url, anchor): if self._should_ignore(self.base_url, path, anchor):
continue continue
if self._isdir(anchor): if self._isdir(anchor):
@ -214,9 +214,12 @@ class HttpDirectory(RemoteDirectory):
return link.href.endswith("/") return link.href.endswith("/")
@staticmethod @staticmethod
def _should_ignore(base_url, link: Anchor): def _should_ignore(base_url, current_path, link: Anchor):
if link.text in HttpDirectory.FILE_NAME_BLACKLIST or link.href in ("../", "./", "", "..", "../../") \
or link.href.endswith(HttpDirectory.BLACK_LIST): if urljoin(base_url, link.href) == urljoin(urljoin(base_url, current_path), "../"):
return True
if link.href.endswith(HttpDirectory.BLACK_LIST):
return True return True
# Ignore external links # Ignore external links