mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-12-14 15:49:02 +00:00
More work on task_tracker integration
This commit is contained in:
312
server.go
312
server.go
@@ -2,12 +2,15 @@ package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto"
|
||||
"crypto/hmac"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/spf13/viper"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"golang.org/x/time/rate"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
@@ -15,34 +18,174 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
var serverClient = http.Client {
|
||||
Timeout: config.ServerTimeout,
|
||||
var serverWorker *TrackerWorker
|
||||
|
||||
var serverClient = http.Client{
|
||||
Timeout: config.ServerTimeout,
|
||||
Transport: new(ServerTripper),
|
||||
}
|
||||
|
||||
var serverUserAgent = "od-database-crawler/" + rootCmd.Version
|
||||
|
||||
//TODO: Move those elsewhere?
|
||||
type WorkerAccessRequest struct {
|
||||
Assign bool `json:"assign"`
|
||||
Submit bool `json:"submit"`
|
||||
Project int `json:"project"`
|
||||
}
|
||||
|
||||
//todo: only keep necessary info
|
||||
type FetchTaskResponse struct {
|
||||
Ok bool `json:"ok"`
|
||||
Message string `json:"message"`
|
||||
Content struct {
|
||||
Task struct {
|
||||
Id int64 `json:"id"`
|
||||
Priority int64 `json:"priority"`
|
||||
Project struct {
|
||||
Id int64 `json:"id"`
|
||||
Priority int64 `json:"priority"`
|
||||
Name string `json:"name"`
|
||||
CloneUrl string `json:"clone_url"`
|
||||
GitRepo string `json:"git_repo"`
|
||||
Version string `json:"version"`
|
||||
Motd string `json:"motd"`
|
||||
Public bool `json:"public"`
|
||||
Hidden bool `json:"hidden"`
|
||||
Chain int64 `json:"chain"`
|
||||
Paused bool `json:"paused"`
|
||||
AssignRate rate.Limit `json:"assign_rate"`
|
||||
SubmitRate rate.Limit `json:"submit_rate"`
|
||||
} `json:"project"`
|
||||
Assignee int64 `json:"assignee"`
|
||||
Retries int64 `json:"retries"`
|
||||
MaxRetries int64 `json:"max_retries"`
|
||||
Status int64 `json:"status"`
|
||||
Recipe string `json:"recipe"`
|
||||
MaxAssignTime int64 `json:"max_assign_time"`
|
||||
AssignTime int64 `json:"assign_time"`
|
||||
VerificationCount int64 `json:"verification_count"`
|
||||
} `json:"task"`
|
||||
} `json:"content"`
|
||||
}
|
||||
|
||||
type TrackerWorker struct {
|
||||
Alias string `json:"alias"`
|
||||
Id int `json:"id"`
|
||||
Secret []byte `json:"secret"`
|
||||
}
|
||||
|
||||
type CreateTrackerWorkerResponse struct {
|
||||
Ok bool `json:"ok"`
|
||||
Message string `json:"message"`
|
||||
Content struct {
|
||||
Worker TrackerWorker `json:"worker"`
|
||||
} `json:"content"`
|
||||
}
|
||||
|
||||
type CreateTrackerWorkerRequest struct {
|
||||
Alias string `json:"alias"`
|
||||
}
|
||||
|
||||
func getOrCreateWorker() *TrackerWorker {
|
||||
|
||||
var worker TrackerWorker
|
||||
|
||||
if _, err := os.Stat("worker.json"); os.IsNotExist(err) {
|
||||
req := CreateTrackerWorkerRequest{
|
||||
Alias: "crawler", //todo: load from config
|
||||
}
|
||||
body, _ := json.Marshal(&req)
|
||||
buf := bytes.NewBuffer(body)
|
||||
resp, _ := serverClient.Post(config.TrackerUrl+"/worker/create", "application/json", buf)
|
||||
//todo: handle err
|
||||
|
||||
fmt.Println(resp.StatusCode)
|
||||
|
||||
workerResponse := CreateTrackerWorkerResponse{}
|
||||
respBody, _ := ioutil.ReadAll(resp.Body)
|
||||
_ = json.Unmarshal(respBody, &workerResponse)
|
||||
//todo handle err
|
||||
fmt.Println(workerResponse)
|
||||
|
||||
workerJsonData, _ := json.Marshal(&workerResponse.Content.Worker)
|
||||
fp, _ := os.OpenFile("worker.json", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
|
||||
_, _ = fp.Write(workerJsonData)
|
||||
//todo: handle err
|
||||
} else {
|
||||
fp, _ := os.OpenFile("worker.json", os.O_RDONLY, 0600)
|
||||
workerJsonData, _ := ioutil.ReadAll(fp)
|
||||
_ = json.Unmarshal(workerJsonData, &worker)
|
||||
//todo: handle err
|
||||
}
|
||||
|
||||
return &worker
|
||||
}
|
||||
|
||||
func FetchTask() (t *Task, err error) {
|
||||
res, err := serverClient.PostForm(
|
||||
config.ServerUrl + "/task/get",
|
||||
url.Values{ "token": {config.Token} })
|
||||
if err != nil { return }
|
||||
|
||||
//todo: this whole block should definitely be extracted elsewhere
|
||||
if serverWorker == nil {
|
||||
serverWorker = getOrCreateWorker()
|
||||
|
||||
//Request ASSIGN permission
|
||||
//todo: project ID should be stored as a int in the first place
|
||||
pid, _ := strconv.Atoi(config.TrackerProject)
|
||||
accessReq, _ := json.Marshal(WorkerAccessRequest{
|
||||
Project: int(pid),
|
||||
Assign: true,
|
||||
Submit: false,
|
||||
})
|
||||
buf := bytes.NewBuffer(accessReq)
|
||||
res, err := serverClient.Post(config.TrackerUrl+"/project/request_access", "application/json", buf)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Println(res.StatusCode)
|
||||
}
|
||||
|
||||
res, err := serverClient.Get(
|
||||
config.TrackerUrl + "/task/get/" + config.TrackerProject)
|
||||
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
switch res.StatusCode {
|
||||
case 200:
|
||||
break
|
||||
//TODO: 404 should not happen.
|
||||
case 404, 500:
|
||||
return nil, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("http %s", res.Status)
|
||||
}
|
||||
|
||||
t = new(Task)
|
||||
err = json.NewDecoder(res.Body).Decode(t)
|
||||
jsonResponse := FetchTaskResponse{}
|
||||
err = json.NewDecoder(res.Body).Decode(&jsonResponse)
|
||||
if _, ok := err.(*json.SyntaxError); ok {
|
||||
return nil, fmt.Errorf("/task/get returned invalid JSON")
|
||||
} else if err != nil { return }
|
||||
} else if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if !jsonResponse.Ok {
|
||||
//The tracker will return Ok=false when no tasks are available
|
||||
err = errors.New(jsonResponse.Message)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Println(jsonResponse.Content.Task.Recipe)
|
||||
task := Task{}
|
||||
err = json.Unmarshal([]byte(jsonResponse.Content.Task.Recipe), &task)
|
||||
if _, ok := err.(*json.SyntaxError); ok {
|
||||
return nil, fmt.Errorf("/task/get returned invalid JSON")
|
||||
} else if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
t = &task
|
||||
|
||||
return
|
||||
}
|
||||
@@ -59,7 +202,7 @@ func PushResult(result *TaskResult, f *os.File) (err error) {
|
||||
return
|
||||
}
|
||||
|
||||
err = uploadChunks(result.WebsiteId, f)
|
||||
err = uploadWebsocket(result.WebsiteId, f)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to upload file list: %s", err)
|
||||
err2 := CancelTask(result.WebsiteId)
|
||||
@@ -75,93 +218,49 @@ func PushResult(result *TaskResult, f *os.File) (err error) {
|
||||
return
|
||||
}
|
||||
|
||||
func uploadChunks(websiteId uint64, f *os.File) error {
|
||||
eof := false
|
||||
for iter := 1; !eof; iter++ {
|
||||
// TODO Stream with io.Pipe?
|
||||
var b bytes.Buffer
|
||||
func uploadWebsocket(websiteId uint64, f *os.File) error {
|
||||
|
||||
multi := multipart.NewWriter(&b)
|
||||
|
||||
// Set upload fields
|
||||
var err error
|
||||
err = multi.WriteField("token", config.Token)
|
||||
if err != nil { return err }
|
||||
err = multi.WriteField("website_id", fmt.Sprintf("%d", websiteId))
|
||||
if err != nil { return err }
|
||||
|
||||
// Copy chunk to file_list
|
||||
formFile, err := multi.CreateFormFile("file_list", "file_list")
|
||||
var n int64
|
||||
n, err = io.CopyN(formFile, f, config.ChunkSize)
|
||||
if err != io.EOF && err != nil {
|
||||
return err
|
||||
}
|
||||
if n == 0 {
|
||||
// Don't upload, no content
|
||||
return nil
|
||||
} else if n < config.ChunkSize {
|
||||
err = nil
|
||||
// Break at end of iteration
|
||||
eof = true
|
||||
}
|
||||
|
||||
multi.Close()
|
||||
|
||||
for retries := 0; retries < viper.GetInt(ConfUploadRetries); retries++ {
|
||||
if retries > 0 {
|
||||
// Error occurred, retry upload
|
||||
time.Sleep(viper.GetDuration(ConfUploadRetryInterval))
|
||||
}
|
||||
|
||||
req, err := http.NewRequest(
|
||||
http.MethodPost,
|
||||
config.ServerUrl + "/task/upload",
|
||||
&b)
|
||||
req.Header.Set("content-type", multi.FormDataContentType())
|
||||
if err != nil { continue }
|
||||
|
||||
res, err := serverClient.Do(req)
|
||||
if err != nil { continue }
|
||||
res.Body.Close()
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
logrus.WithField("status", res.Status).
|
||||
WithField("part", iter).
|
||||
Errorf("Upload failed")
|
||||
continue
|
||||
}
|
||||
|
||||
// Upload successful
|
||||
break
|
||||
}
|
||||
|
||||
logrus.WithField("id", websiteId).
|
||||
WithField("part", iter).
|
||||
Infof("Uploaded files chunk")
|
||||
}
|
||||
//TODO:
|
||||
/*
|
||||
* OD-DB will give you an Upload token when you fetch the task
|
||||
* Open a WS connection at {ws_bucket_addr}/upload with the 'X-Upload-Token' as header
|
||||
* Stream whole file as a single WS message
|
||||
* Close connection
|
||||
*/
|
||||
return nil
|
||||
}
|
||||
|
||||
func uploadResult(result *TaskResult) (err error) {
|
||||
req := releaseTaskRequest {
|
||||
TaskId: int64(result.WebsiteId),
|
||||
|
||||
//TODO:
|
||||
/*
|
||||
* When the file has been uploaded, just release the task with the TR_OK code
|
||||
* Don't bother sending the ODDB-related stuff, You just need the task id
|
||||
* Probably a good idea to wrap this around a new releaseTask() function
|
||||
*/
|
||||
|
||||
req := releaseTaskRequest{
|
||||
TaskId: int64(result.WebsiteId),
|
||||
ResultCode: result.ResultCode,
|
||||
// TODO What is verification
|
||||
Verification: 0,
|
||||
}
|
||||
|
||||
resultEnc, err := json.Marshal(&req)
|
||||
if err != nil { panic(err) }
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
res, err := serverClient.PostForm(
|
||||
config.ServerUrl + "/task/release",
|
||||
url.Values {
|
||||
"token": {config.Token},
|
||||
config.TrackerUrl+"/task/release",
|
||||
url.Values{
|
||||
"token": {config.Token},
|
||||
"result": {string(resultEnc)},
|
||||
},
|
||||
)
|
||||
if err != nil { return }
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
res.Body.Close()
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
@@ -172,22 +271,8 @@ func uploadResult(result *TaskResult) (err error) {
|
||||
}
|
||||
|
||||
func CancelTask(websiteId uint64) (err error) {
|
||||
// TODO Remove, no endpoint in task_tracker
|
||||
|
||||
res, err := serverClient.PostForm(
|
||||
config.ServerUrl + "/task/cancel",
|
||||
url.Values{
|
||||
"token": {config.Token},
|
||||
"website_id": {strconv.FormatUint(websiteId, 10)},
|
||||
},
|
||||
)
|
||||
if err != nil { return }
|
||||
res.Body.Close()
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("failed to cancel task: %s", res.Status)
|
||||
}
|
||||
|
||||
//TODO: Maybe wrap this function around releaseTask(cancel: bool) ?
|
||||
return
|
||||
}
|
||||
|
||||
@@ -195,6 +280,35 @@ type ServerTripper struct{}
|
||||
|
||||
func (t *ServerTripper) RoundTrip(req *http.Request) (res *http.Response, err error) {
|
||||
req.Header.Set("User-Agent", serverUserAgent)
|
||||
|
||||
//TODO: Move this whole block elsewhere
|
||||
if serverWorker != nil {
|
||||
var content []byte
|
||||
if req.Method == "GET" {
|
||||
content = []byte("/task/get/" + config.TrackerProject) //todo: find a less retarded way of doing that
|
||||
} else {
|
||||
//todo: this is retarded and should be moved elsewhere
|
||||
buf, _ := ioutil.ReadAll(req.Body)
|
||||
rdr1 := ioutil.NopCloser(bytes.NewBuffer(buf))
|
||||
rdr2 := ioutil.NopCloser(bytes.NewBuffer(buf))
|
||||
|
||||
content, _ = ioutil.ReadAll(rdr1)
|
||||
|
||||
req.Body = rdr2
|
||||
}
|
||||
|
||||
ts := time.Now().Format(time.RFC1123)
|
||||
|
||||
mac := hmac.New(crypto.SHA256.New, serverWorker.Secret)
|
||||
mac.Write(content)
|
||||
mac.Write([]byte(ts))
|
||||
sig := hex.EncodeToString(mac.Sum(nil))
|
||||
|
||||
req.Header.Add("X-Worker-Id", strconv.Itoa(serverWorker.Id))
|
||||
req.Header.Add("Timestamp", time.Now().Format(time.RFC1123))
|
||||
req.Header.Add("X-Signature", sig)
|
||||
|
||||
}
|
||||
return http.DefaultTransport.RoundTrip(req)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user