mirror of
https://github.com/simon987/od-database.git
synced 2025-04-20 10:56:47 +00:00
Skip 'Parent directory' links more efficiently
This commit is contained in:
parent
756e331c83
commit
55a0fde19d
@ -86,7 +86,7 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
FILE_NAME_BLACKLIST = (
|
FILE_NAME_BLACKLIST = (
|
||||||
"Parent Directory",
|
"Parent Directory",
|
||||||
" Parent Directory"
|
" Parent Directory"
|
||||||
"../"
|
"../",
|
||||||
|
|
||||||
)
|
)
|
||||||
MAX_RETRIES = 2
|
MAX_RETRIES = 2
|
||||||
@ -113,7 +113,7 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
files = []
|
files = []
|
||||||
|
|
||||||
for anchor in anchors:
|
for anchor in anchors:
|
||||||
if self._should_ignore(self.base_url, anchor):
|
if self._should_ignore(self.base_url, path, anchor):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if self._isdir(anchor):
|
if self._isdir(anchor):
|
||||||
@ -214,9 +214,12 @@ class HttpDirectory(RemoteDirectory):
|
|||||||
return link.href.endswith("/")
|
return link.href.endswith("/")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _should_ignore(base_url, link: Anchor):
|
def _should_ignore(base_url, current_path, link: Anchor):
|
||||||
if link.text in HttpDirectory.FILE_NAME_BLACKLIST or link.href in ("../", "./", "", "..", "../../") \
|
|
||||||
or link.href.endswith(HttpDirectory.BLACK_LIST):
|
if urljoin(base_url, link.href) == urljoin(urljoin(base_url, current_path), "../"):
|
||||||
|
return True
|
||||||
|
|
||||||
|
if link.href.endswith(HttpDirectory.BLACK_LIST):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Ignore external links
|
# Ignore external links
|
||||||
|
Loading…
x
Reference in New Issue
Block a user