From 84865554263cd7401455aceec670d291e7b279fa Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 19 Jun 2018 10:36:09 -0400 Subject: [PATCH] Ignore 'parent directory' links --- crawl_server/remote_http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawl_server/remote_http.py b/crawl_server/remote_http.py index e8420a4..5a96202 100644 --- a/crawl_server/remote_http.py +++ b/crawl_server/remote_http.py @@ -177,7 +177,7 @@ class HttpDirectory(RemoteDirectory): @staticmethod def _should_ignore(base_url, link: Anchor): - if link.text == "../" or link.href.endswith(HttpDirectory.BLACK_LIST): + if link.text == "../" or link.href == "../" or link.href.endswith(HttpDirectory.BLACK_LIST): return True # Ignore external links