3 Commits

Author SHA1 Message Date
Richard Patel
1625d6c888 Bump to v1.0.2 2018-11-18 18:53:57 +01:00
Richard Patel
03a487f393 Fix crawl loop 2018-11-18 18:45:06 +01:00
Richard Patel
ac8221b109 Retry /task/upload 2018-11-18 18:33:26 +01:00
3 changed files with 33 additions and 19 deletions

View File

@@ -50,6 +50,8 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
var linkHref string var linkHref string
for { for {
err = nil
tokenType := doc.Next() tokenType := doc.Next()
if tokenType == html.ErrorToken { if tokenType == html.ErrorToken {
break break
@@ -80,16 +82,16 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
linkHref = "" linkHref = ""
if strings.LastIndexByte(href, '?') != -1 { if strings.LastIndexByte(href, '?') != -1 {
goto nextToken continue
} }
switch href { switch href {
case "", " ", ".", "..", "/": case "", " ", ".", "..", "/":
goto nextToken continue
} }
if strings.Contains(href, "../") { if strings.Contains(href, "../") {
goto nextToken continue
} }
var link fasturl.URL var link fasturl.URL
@@ -108,8 +110,6 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
links = append(links, link) links = append(links, link)
} }
} }
nextToken:
} }
return return

View File

@@ -14,7 +14,7 @@ import (
var app = cli.App { var app = cli.App {
Name: "od-database-crawler", Name: "od-database-crawler",
Usage: "OD-Database Go crawler", Usage: "OD-Database Go crawler",
Version: "1.0.1", Version: "1.0.2",
BashComplete: cli.DefaultAppComplete, BashComplete: cli.DefaultAppComplete,
Writer: os.Stdout, Writer: os.Stdout,
Action: cmdBase, Action: cmdBase,

View File

@@ -11,6 +11,7 @@ import (
"net/url" "net/url"
"os" "os"
"strconv" "strconv"
"time"
) )
var serverClient = http.Client { var serverClient = http.Client {
@@ -101,25 +102,38 @@ func uploadChunks(websiteId uint64, f *os.File) error {
multi.Close() multi.Close()
req, err := http.NewRequest( for retried := false; true; retried = true {
http.MethodPost, err = nil
config.ServerUrl + "/task/upload", if retried {
&b) // Error occurred, retry upload
req.Header.Set("content-type", multi.FormDataContentType()) time.Sleep(5 * time.Second)
if err != nil { return err } }
res, err := serverClient.Do(req) req, err := http.NewRequest(
if err != nil { return err } http.MethodPost,
res.Body.Close() config.ServerUrl + "/task/upload",
&b)
req.Header.Set("content-type", multi.FormDataContentType())
if err != nil { continue }
if res.StatusCode != http.StatusOK { res, err := serverClient.Do(req)
return fmt.Errorf("failed to upload list part %d: %s", if err != nil { continue }
iter, res.Status) res.Body.Close()
if res.StatusCode != http.StatusOK {
logrus.WithField("status", res.Status).
WithField("part", iter).
Errorf("Upload failed")
continue
}
// Upload successful
break
} }
logrus.WithField("id", websiteId). logrus.WithField("id", websiteId).
WithField("part", iter). WithField("part", iter).
Infof("Uploading files chunk") Infof("Uploaded files chunk")
} }
return nil return nil
} }