mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-12-13 23:29:02 +00:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1625d6c888 | ||
|
|
03a487f393 | ||
|
|
ac8221b109 |
10
crawl.go
10
crawl.go
@@ -50,6 +50,8 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
|
|||||||
|
|
||||||
var linkHref string
|
var linkHref string
|
||||||
for {
|
for {
|
||||||
|
err = nil
|
||||||
|
|
||||||
tokenType := doc.Next()
|
tokenType := doc.Next()
|
||||||
if tokenType == html.ErrorToken {
|
if tokenType == html.ErrorToken {
|
||||||
break
|
break
|
||||||
@@ -80,16 +82,16 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
|
|||||||
linkHref = ""
|
linkHref = ""
|
||||||
|
|
||||||
if strings.LastIndexByte(href, '?') != -1 {
|
if strings.LastIndexByte(href, '?') != -1 {
|
||||||
goto nextToken
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
switch href {
|
switch href {
|
||||||
case "", " ", ".", "..", "/":
|
case "", " ", ".", "..", "/":
|
||||||
goto nextToken
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if strings.Contains(href, "../") {
|
if strings.Contains(href, "../") {
|
||||||
goto nextToken
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
var link fasturl.URL
|
var link fasturl.URL
|
||||||
@@ -108,8 +110,6 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
|
|||||||
links = append(links, link)
|
links = append(links, link)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
nextToken:
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|||||||
2
main.go
2
main.go
@@ -14,7 +14,7 @@ import (
|
|||||||
var app = cli.App {
|
var app = cli.App {
|
||||||
Name: "od-database-crawler",
|
Name: "od-database-crawler",
|
||||||
Usage: "OD-Database Go crawler",
|
Usage: "OD-Database Go crawler",
|
||||||
Version: "1.0.1",
|
Version: "1.0.2",
|
||||||
BashComplete: cli.DefaultAppComplete,
|
BashComplete: cli.DefaultAppComplete,
|
||||||
Writer: os.Stdout,
|
Writer: os.Stdout,
|
||||||
Action: cmdBase,
|
Action: cmdBase,
|
||||||
|
|||||||
40
server.go
40
server.go
@@ -11,6 +11,7 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
var serverClient = http.Client {
|
var serverClient = http.Client {
|
||||||
@@ -101,25 +102,38 @@ func uploadChunks(websiteId uint64, f *os.File) error {
|
|||||||
|
|
||||||
multi.Close()
|
multi.Close()
|
||||||
|
|
||||||
req, err := http.NewRequest(
|
for retried := false; true; retried = true {
|
||||||
http.MethodPost,
|
err = nil
|
||||||
config.ServerUrl + "/task/upload",
|
if retried {
|
||||||
&b)
|
// Error occurred, retry upload
|
||||||
req.Header.Set("content-type", multi.FormDataContentType())
|
time.Sleep(5 * time.Second)
|
||||||
if err != nil { return err }
|
}
|
||||||
|
|
||||||
res, err := serverClient.Do(req)
|
req, err := http.NewRequest(
|
||||||
if err != nil { return err }
|
http.MethodPost,
|
||||||
res.Body.Close()
|
config.ServerUrl + "/task/upload",
|
||||||
|
&b)
|
||||||
|
req.Header.Set("content-type", multi.FormDataContentType())
|
||||||
|
if err != nil { continue }
|
||||||
|
|
||||||
if res.StatusCode != http.StatusOK {
|
res, err := serverClient.Do(req)
|
||||||
return fmt.Errorf("failed to upload list part %d: %s",
|
if err != nil { continue }
|
||||||
iter, res.Status)
|
res.Body.Close()
|
||||||
|
|
||||||
|
if res.StatusCode != http.StatusOK {
|
||||||
|
logrus.WithField("status", res.Status).
|
||||||
|
WithField("part", iter).
|
||||||
|
Errorf("Upload failed")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upload successful
|
||||||
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
logrus.WithField("id", websiteId).
|
logrus.WithField("id", websiteId).
|
||||||
WithField("part", iter).
|
WithField("part", iter).
|
||||||
Infof("Uploading files chunk")
|
Infof("Uploaded files chunk")
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user