From 084b3a59030b303be98a9824fea6ce9cc3ea3114 Mon Sep 17 00:00:00 2001 From: Richard Patel Date: Thu, 15 Nov 2018 23:51:31 +0100 Subject: [PATCH] Optimizing with hexa :P --- crawl.go | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/crawl.go b/crawl.go index 628f213..214ace6 100644 --- a/crawl.go +++ b/crawl.go @@ -39,7 +39,6 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) { doc := html.NewTokenizer(bytes.NewReader(body)) var linkHref string - var linkTexts []string for { tokenType := doc.Next() token := doc.Token() @@ -58,20 +57,13 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) { } } - case html.TextToken: - if linkHref != "" { - linkTexts = append(linkTexts, token.Data) - } - case html.EndTagToken: if linkHref != "" && token.DataAtom == atom.A { // Copy params href := linkHref - linkText := strings.Join(linkTexts, " ") // Reset params linkHref = "" - linkTexts = nil // TODO Optimized decision tree if strings.LastIndexByte(href, '?') != -1 { @@ -83,10 +75,8 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) { goto nextToken } } - for _, entry := range fileNameBlackList { - if strings.Contains(linkText, entry) { - goto nextToken - } + if strings.Contains(href, "../") { + goto nextToken } var link fasturl.URL @@ -194,10 +184,3 @@ var urlBlackList = [...]string { "..", "/", } - -var fileNameBlackList = [...]string { - "Parent Directory", - " Parent Directory", - "../", -} -