mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-12-13 15:19:03 +00:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1625d6c888 | ||
|
|
03a487f393 | ||
|
|
ac8221b109 |
10
crawl.go
10
crawl.go
@@ -50,6 +50,8 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
|
||||
|
||||
var linkHref string
|
||||
for {
|
||||
err = nil
|
||||
|
||||
tokenType := doc.Next()
|
||||
if tokenType == html.ErrorToken {
|
||||
break
|
||||
@@ -80,16 +82,16 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
|
||||
linkHref = ""
|
||||
|
||||
if strings.LastIndexByte(href, '?') != -1 {
|
||||
goto nextToken
|
||||
continue
|
||||
}
|
||||
|
||||
switch href {
|
||||
case "", " ", ".", "..", "/":
|
||||
goto nextToken
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.Contains(href, "../") {
|
||||
goto nextToken
|
||||
continue
|
||||
}
|
||||
|
||||
var link fasturl.URL
|
||||
@@ -108,8 +110,6 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
|
||||
links = append(links, link)
|
||||
}
|
||||
}
|
||||
|
||||
nextToken:
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
2
main.go
2
main.go
@@ -14,7 +14,7 @@ import (
|
||||
var app = cli.App {
|
||||
Name: "od-database-crawler",
|
||||
Usage: "OD-Database Go crawler",
|
||||
Version: "1.0.1",
|
||||
Version: "1.0.2",
|
||||
BashComplete: cli.DefaultAppComplete,
|
||||
Writer: os.Stdout,
|
||||
Action: cmdBase,
|
||||
|
||||
40
server.go
40
server.go
@@ -11,6 +11,7 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
var serverClient = http.Client {
|
||||
@@ -101,25 +102,38 @@ func uploadChunks(websiteId uint64, f *os.File) error {
|
||||
|
||||
multi.Close()
|
||||
|
||||
req, err := http.NewRequest(
|
||||
http.MethodPost,
|
||||
config.ServerUrl + "/task/upload",
|
||||
&b)
|
||||
req.Header.Set("content-type", multi.FormDataContentType())
|
||||
if err != nil { return err }
|
||||
for retried := false; true; retried = true {
|
||||
err = nil
|
||||
if retried {
|
||||
// Error occurred, retry upload
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
|
||||
res, err := serverClient.Do(req)
|
||||
if err != nil { return err }
|
||||
res.Body.Close()
|
||||
req, err := http.NewRequest(
|
||||
http.MethodPost,
|
||||
config.ServerUrl + "/task/upload",
|
||||
&b)
|
||||
req.Header.Set("content-type", multi.FormDataContentType())
|
||||
if err != nil { continue }
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("failed to upload list part %d: %s",
|
||||
iter, res.Status)
|
||||
res, err := serverClient.Do(req)
|
||||
if err != nil { continue }
|
||||
res.Body.Close()
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
logrus.WithField("status", res.Status).
|
||||
WithField("part", iter).
|
||||
Errorf("Upload failed")
|
||||
continue
|
||||
}
|
||||
|
||||
// Upload successful
|
||||
break
|
||||
}
|
||||
|
||||
logrus.WithField("id", websiteId).
|
||||
WithField("part", iter).
|
||||
Infof("Uploading files chunk")
|
||||
Infof("Uploaded files chunk")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user