mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-04-16 08:56:44 +00:00
Fix crawl loop
This commit is contained in:
parent
ac8221b109
commit
03a487f393
10
crawl.go
10
crawl.go
@ -50,6 +50,8 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
|
||||
|
||||
var linkHref string
|
||||
for {
|
||||
err = nil
|
||||
|
||||
tokenType := doc.Next()
|
||||
if tokenType == html.ErrorToken {
|
||||
break
|
||||
@ -80,16 +82,16 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
|
||||
linkHref = ""
|
||||
|
||||
if strings.LastIndexByte(href, '?') != -1 {
|
||||
goto nextToken
|
||||
continue
|
||||
}
|
||||
|
||||
switch href {
|
||||
case "", " ", ".", "..", "/":
|
||||
goto nextToken
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.Contains(href, "../") {
|
||||
goto nextToken
|
||||
continue
|
||||
}
|
||||
|
||||
var link fasturl.URL
|
||||
@ -108,8 +110,6 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
|
||||
links = append(links, link)
|
||||
}
|
||||
}
|
||||
|
||||
nextToken:
|
||||
}
|
||||
|
||||
return
|
||||
|
Loading…
x
Reference in New Issue
Block a user