3 Commits

Author SHA1 Message Date
Richard Patel
1625d6c888 Bump to v1.0.2 2018-11-18 18:53:57 +01:00
Richard Patel
03a487f393 Fix crawl loop 2018-11-18 18:45:06 +01:00
Richard Patel
ac8221b109 Retry /task/upload 2018-11-18 18:33:26 +01:00
3 changed files with 33 additions and 19 deletions

View File

@@ -50,6 +50,8 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
var linkHref string var linkHref string
for { for {
err = nil
tokenType := doc.Next() tokenType := doc.Next()
if tokenType == html.ErrorToken { if tokenType == html.ErrorToken {
break break
@@ -80,16 +82,16 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
linkHref = "" linkHref = ""
if strings.LastIndexByte(href, '?') != -1 { if strings.LastIndexByte(href, '?') != -1 {
goto nextToken continue
} }
switch href { switch href {
case "", " ", ".", "..", "/": case "", " ", ".", "..", "/":
goto nextToken continue
} }
if strings.Contains(href, "../") { if strings.Contains(href, "../") {
goto nextToken continue
} }
var link fasturl.URL var link fasturl.URL
@@ -108,8 +110,6 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
links = append(links, link) links = append(links, link)
} }
} }
nextToken:
} }
return return

View File

@@ -14,7 +14,7 @@ import (
var app = cli.App { var app = cli.App {
Name: "od-database-crawler", Name: "od-database-crawler",
Usage: "OD-Database Go crawler", Usage: "OD-Database Go crawler",
Version: "1.0.1", Version: "1.0.2",
BashComplete: cli.DefaultAppComplete, BashComplete: cli.DefaultAppComplete,
Writer: os.Stdout, Writer: os.Stdout,
Action: cmdBase, Action: cmdBase,

View File

@@ -11,6 +11,7 @@ import (
"net/url" "net/url"
"os" "os"
"strconv" "strconv"
"time"
) )
var serverClient = http.Client { var serverClient = http.Client {
@@ -101,25 +102,38 @@ func uploadChunks(websiteId uint64, f *os.File) error {
multi.Close() multi.Close()
for retried := false; true; retried = true {
err = nil
if retried {
// Error occurred, retry upload
time.Sleep(5 * time.Second)
}
req, err := http.NewRequest( req, err := http.NewRequest(
http.MethodPost, http.MethodPost,
config.ServerUrl + "/task/upload", config.ServerUrl + "/task/upload",
&b) &b)
req.Header.Set("content-type", multi.FormDataContentType()) req.Header.Set("content-type", multi.FormDataContentType())
if err != nil { return err } if err != nil { continue }
res, err := serverClient.Do(req) res, err := serverClient.Do(req)
if err != nil { return err } if err != nil { continue }
res.Body.Close() res.Body.Close()
if res.StatusCode != http.StatusOK { if res.StatusCode != http.StatusOK {
return fmt.Errorf("failed to upload list part %d: %s", logrus.WithField("status", res.Status).
iter, res.Status) WithField("part", iter).
Errorf("Upload failed")
continue
}
// Upload successful
break
} }
logrus.WithField("id", websiteId). logrus.WithField("id", websiteId).
WithField("part", iter). WithField("part", iter).
Infof("Uploading files chunk") Infof("Uploaded files chunk")
} }
return nil return nil
} }