From ac0b8d2d0b3a54101e490f5b2cde4443df24ca73 Mon Sep 17 00:00:00 2001 From: Richard Patel Date: Thu, 15 Nov 2018 23:36:41 +0100 Subject: [PATCH] Blacklist all paths with a query parameter --- crawl.go | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/crawl.go b/crawl.go index 6085bfa..628f213 100644 --- a/crawl.go +++ b/crawl.go @@ -74,16 +74,15 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) { linkTexts = nil // TODO Optimized decision tree + if strings.LastIndexByte(href, '?') != -1 { + goto nextToken + } + for _, entry := range urlBlackList { if href == entry { goto nextToken } } - for _, entry := range urlPartBlackList { - if strings.Contains(href, entry) { - goto nextToken - } - } for _, entry := range fileNameBlackList { if strings.Contains(linkText, entry) { goto nextToken @@ -196,29 +195,6 @@ var urlBlackList = [...]string { "/", } -var urlPartBlackList = [...]string { - "?C=N&O=D", - "?C=M&O=A", - "?C=S&O=A", - "?C=D&O=A", - "?C=N;O=D", - "?C=M;O=A", - "?C=M&O=D", - "?C=S;O=A", - "?C=S&O=D", - "?C=D;O=A", - "?MA", - "?SA", - "?DA", - "?ND", - "?C=N&O=A", - "?C=N&O=A", - "?M=A", - "?N=D", - "?S=A", - "?D=A", -} - var fileNameBlackList = [...]string { "Parent Directory", " Parent Directory",