3 Commits

Author SHA1 Message Date
Richard Patel
1625d6c888 Bump to v1.0.2 2018-11-18 18:53:57 +01:00
Richard Patel
03a487f393 Fix crawl loop 2018-11-18 18:45:06 +01:00
Richard Patel
ac8221b109 Retry /task/upload 2018-11-18 18:33:26 +01:00
3 changed files with 33 additions and 19 deletions

View File

@@ -50,6 +50,8 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
var linkHref string
for {
err = nil
tokenType := doc.Next()
if tokenType == html.ErrorToken {
break
@@ -80,16 +82,16 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
linkHref = ""
if strings.LastIndexByte(href, '?') != -1 {
goto nextToken
continue
}
switch href {
case "", " ", ".", "..", "/":
goto nextToken
continue
}
if strings.Contains(href, "../") {
goto nextToken
continue
}
var link fasturl.URL
@@ -108,8 +110,6 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
links = append(links, link)
}
}
nextToken:
}
return

View File

@@ -14,7 +14,7 @@ import (
var app = cli.App {
Name: "od-database-crawler",
Usage: "OD-Database Go crawler",
Version: "1.0.1",
Version: "1.0.2",
BashComplete: cli.DefaultAppComplete,
Writer: os.Stdout,
Action: cmdBase,

View File

@@ -11,6 +11,7 @@ import (
"net/url"
"os"
"strconv"
"time"
)
var serverClient = http.Client {
@@ -101,25 +102,38 @@ func uploadChunks(websiteId uint64, f *os.File) error {
multi.Close()
req, err := http.NewRequest(
http.MethodPost,
config.ServerUrl + "/task/upload",
&b)
req.Header.Set("content-type", multi.FormDataContentType())
if err != nil { return err }
for retried := false; true; retried = true {
err = nil
if retried {
// Error occurred, retry upload
time.Sleep(5 * time.Second)
}
res, err := serverClient.Do(req)
if err != nil { return err }
res.Body.Close()
req, err := http.NewRequest(
http.MethodPost,
config.ServerUrl + "/task/upload",
&b)
req.Header.Set("content-type", multi.FormDataContentType())
if err != nil { continue }
if res.StatusCode != http.StatusOK {
return fmt.Errorf("failed to upload list part %d: %s",
iter, res.Status)
res, err := serverClient.Do(req)
if err != nil { continue }
res.Body.Close()
if res.StatusCode != http.StatusOK {
logrus.WithField("status", res.Status).
WithField("part", iter).
Errorf("Upload failed")
continue
}
// Upload successful
break
}
logrus.WithField("id", websiteId).
WithField("part", iter).
Infof("Uploading files chunk")
Infof("Uploaded files chunk")
}
return nil
}