mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-04-19 18:36:43 +00:00
335 lines
8.2 KiB
Go
335 lines
8.2 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto"
|
|
"crypto/hmac"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"github.com/pkg/errors"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/time/rate"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"strconv"
|
|
"time"
|
|
)
|
|
|
|
var serverWorker *TrackerWorker
|
|
|
|
var serverClient = http.Client{
|
|
Timeout: config.ServerTimeout,
|
|
Transport: new(ServerTripper),
|
|
}
|
|
|
|
var serverUserAgent = "od-database-crawler/" + rootCmd.Version
|
|
|
|
//TODO: Move those elsewhere?
|
|
type WorkerAccessRequest struct {
|
|
Assign bool `json:"assign"`
|
|
Submit bool `json:"submit"`
|
|
Project int `json:"project"`
|
|
}
|
|
|
|
//todo: only keep necessary info
|
|
type FetchTaskResponse struct {
|
|
Ok bool `json:"ok"`
|
|
Message string `json:"message"`
|
|
Content struct {
|
|
Task struct {
|
|
Id int64 `json:"id"`
|
|
Priority int64 `json:"priority"`
|
|
Project struct {
|
|
Id int64 `json:"id"`
|
|
Priority int64 `json:"priority"`
|
|
Name string `json:"name"`
|
|
CloneUrl string `json:"clone_url"`
|
|
GitRepo string `json:"git_repo"`
|
|
Version string `json:"version"`
|
|
Motd string `json:"motd"`
|
|
Public bool `json:"public"`
|
|
Hidden bool `json:"hidden"`
|
|
Chain int64 `json:"chain"`
|
|
Paused bool `json:"paused"`
|
|
AssignRate rate.Limit `json:"assign_rate"`
|
|
SubmitRate rate.Limit `json:"submit_rate"`
|
|
} `json:"project"`
|
|
Assignee int64 `json:"assignee"`
|
|
Retries int64 `json:"retries"`
|
|
MaxRetries int64 `json:"max_retries"`
|
|
Status int64 `json:"status"`
|
|
Recipe string `json:"recipe"`
|
|
MaxAssignTime int64 `json:"max_assign_time"`
|
|
AssignTime int64 `json:"assign_time"`
|
|
VerificationCount int64 `json:"verification_count"`
|
|
} `json:"task"`
|
|
} `json:"content"`
|
|
}
|
|
|
|
type TrackerWorker struct {
|
|
Alias string `json:"alias"`
|
|
Id int `json:"id"`
|
|
Secret []byte `json:"secret"`
|
|
}
|
|
|
|
type CreateTrackerWorkerResponse struct {
|
|
Ok bool `json:"ok"`
|
|
Message string `json:"message"`
|
|
Content struct {
|
|
Worker TrackerWorker `json:"worker"`
|
|
} `json:"content"`
|
|
}
|
|
|
|
type CreateTrackerWorkerRequest struct {
|
|
Alias string `json:"alias"`
|
|
}
|
|
|
|
func getOrCreateWorker() *TrackerWorker {
|
|
|
|
var worker TrackerWorker
|
|
|
|
if _, err := os.Stat("worker.json"); os.IsNotExist(err) {
|
|
req := CreateTrackerWorkerRequest{
|
|
Alias: "crawler", //todo: load from config
|
|
}
|
|
body, _ := json.Marshal(&req)
|
|
buf := bytes.NewBuffer(body)
|
|
resp, _ := serverClient.Post(config.TrackerUrl+"/worker/create", "application/json", buf)
|
|
//todo: handle err
|
|
|
|
fmt.Println(resp.StatusCode)
|
|
|
|
workerResponse := CreateTrackerWorkerResponse{}
|
|
respBody, _ := ioutil.ReadAll(resp.Body)
|
|
_ = json.Unmarshal(respBody, &workerResponse)
|
|
//todo handle err
|
|
fmt.Println(workerResponse)
|
|
|
|
workerJsonData, _ := json.Marshal(&workerResponse.Content.Worker)
|
|
fp, _ := os.OpenFile("worker.json", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
|
|
_, _ = fp.Write(workerJsonData)
|
|
//todo: handle err
|
|
} else {
|
|
fp, _ := os.OpenFile("worker.json", os.O_RDONLY, 0600)
|
|
workerJsonData, _ := ioutil.ReadAll(fp)
|
|
_ = json.Unmarshal(workerJsonData, &worker)
|
|
//todo: handle err
|
|
}
|
|
|
|
return &worker
|
|
}
|
|
|
|
func FetchTask() (t *Task, err error) {
|
|
|
|
//todo: this whole block should definitely be extracted elsewhere
|
|
if serverWorker == nil {
|
|
serverWorker = getOrCreateWorker()
|
|
|
|
//Request ASSIGN permission
|
|
//todo: project ID should be stored as a int in the first place
|
|
pid, _ := strconv.Atoi(config.TrackerProject)
|
|
accessReq, _ := json.Marshal(WorkerAccessRequest{
|
|
Project: int(pid),
|
|
Assign: true,
|
|
Submit: false,
|
|
})
|
|
buf := bytes.NewBuffer(accessReq)
|
|
res, err := serverClient.Post(config.TrackerUrl+"/project/request_access", "application/json", buf)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
fmt.Println(res.StatusCode)
|
|
}
|
|
|
|
res, err := serverClient.Get(
|
|
config.TrackerUrl + "/task/get/" + config.TrackerProject)
|
|
|
|
if err != nil {
|
|
return
|
|
}
|
|
defer res.Body.Close()
|
|
|
|
switch res.StatusCode {
|
|
case 200:
|
|
break
|
|
//TODO: 404 should not happen.
|
|
case 404, 500:
|
|
return nil, nil
|
|
default:
|
|
return nil, fmt.Errorf("http %s", res.Status)
|
|
}
|
|
|
|
jsonResponse := FetchTaskResponse{}
|
|
err = json.NewDecoder(res.Body).Decode(&jsonResponse)
|
|
if _, ok := err.(*json.SyntaxError); ok {
|
|
return nil, fmt.Errorf("/task/get returned invalid JSON")
|
|
} else if err != nil {
|
|
return
|
|
}
|
|
|
|
if !jsonResponse.Ok {
|
|
//The tracker will return Ok=false when no tasks are available
|
|
err = errors.New(jsonResponse.Message)
|
|
return
|
|
}
|
|
|
|
fmt.Println(jsonResponse.Content.Task.Recipe)
|
|
task := Task{}
|
|
err = json.Unmarshal([]byte(jsonResponse.Content.Task.Recipe), &task)
|
|
if _, ok := err.(*json.SyntaxError); ok {
|
|
return nil, fmt.Errorf("/task/get returned invalid JSON")
|
|
} else if err != nil {
|
|
return
|
|
}
|
|
|
|
t = &task
|
|
|
|
return
|
|
}
|
|
|
|
func PushResult(result *TaskResult, f *os.File) (err error) {
|
|
if result.WebsiteId == 0 {
|
|
// Not a real result, don't push
|
|
return nil
|
|
}
|
|
|
|
// Rewind to the beginning of the file
|
|
_, err = f.Seek(0, 0)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
err = uploadWebsocket(result.WebsiteId, f)
|
|
if err != nil {
|
|
logrus.Errorf("Failed to upload file list: %s", err)
|
|
err2 := CancelTask(result.WebsiteId)
|
|
if err2 != nil {
|
|
logrus.Error(err2)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Upload result ignoring errors
|
|
uploadResult(result)
|
|
|
|
return
|
|
}
|
|
|
|
func uploadWebsocket(websiteId uint64, f *os.File) error {
|
|
|
|
//TODO:
|
|
/*
|
|
* OD-DB will give you an Upload token when you fetch the task
|
|
* Open a WS connection at {ws_bucket_addr}/upload with the 'X-Upload-Token' as header
|
|
* Stream whole file as a single WS message
|
|
* Close connection
|
|
*/
|
|
return nil
|
|
}
|
|
|
|
func uploadResult(result *TaskResult) (err error) {
|
|
|
|
//TODO:
|
|
/*
|
|
* When the file has been uploaded, just release the task with the TR_OK code
|
|
* Don't bother sending the ODDB-related stuff, You just need the task id
|
|
* Probably a good idea to wrap this around a new releaseTask() function
|
|
*/
|
|
|
|
req := releaseTaskRequest{
|
|
TaskId: int64(result.WebsiteId),
|
|
ResultCode: result.ResultCode,
|
|
// TODO What is verification
|
|
Verification: 0,
|
|
}
|
|
|
|
resultEnc, err := json.Marshal(&req)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
res, err := serverClient.PostForm(
|
|
config.TrackerUrl+"/task/release",
|
|
url.Values{
|
|
"token": {config.Token},
|
|
"result": {string(resultEnc)},
|
|
},
|
|
)
|
|
if err != nil {
|
|
return
|
|
}
|
|
res.Body.Close()
|
|
|
|
if res.StatusCode != http.StatusOK {
|
|
return HttpError{res.StatusCode}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func CancelTask(websiteId uint64) (err error) {
|
|
|
|
//TODO: Maybe wrap this function around releaseTask(cancel: bool) ?
|
|
return
|
|
}
|
|
|
|
type ServerTripper struct{}
|
|
|
|
func (t *ServerTripper) RoundTrip(req *http.Request) (res *http.Response, err error) {
|
|
req.Header.Set("User-Agent", serverUserAgent)
|
|
|
|
//TODO: Move this whole block elsewhere
|
|
if serverWorker != nil {
|
|
var content []byte
|
|
if req.Method == "GET" {
|
|
content = []byte("/task/get/" + config.TrackerProject) //todo: find a less retarded way of doing that
|
|
} else {
|
|
//todo: this is retarded and should be moved elsewhere
|
|
buf, _ := ioutil.ReadAll(req.Body)
|
|
rdr1 := ioutil.NopCloser(bytes.NewBuffer(buf))
|
|
rdr2 := ioutil.NopCloser(bytes.NewBuffer(buf))
|
|
|
|
content, _ = ioutil.ReadAll(rdr1)
|
|
|
|
req.Body = rdr2
|
|
}
|
|
|
|
ts := time.Now().Format(time.RFC1123)
|
|
|
|
mac := hmac.New(crypto.SHA256.New, serverWorker.Secret)
|
|
mac.Write(content)
|
|
mac.Write([]byte(ts))
|
|
sig := hex.EncodeToString(mac.Sum(nil))
|
|
|
|
req.Header.Add("X-Worker-Id", strconv.Itoa(serverWorker.Id))
|
|
req.Header.Add("Timestamp", time.Now().Format(time.RFC1123))
|
|
req.Header.Add("X-Signature", sig)
|
|
|
|
}
|
|
return http.DefaultTransport.RoundTrip(req)
|
|
}
|
|
|
|
const mimeJSON = "application/json"
|
|
|
|
// https://github.com/simon987/task_tracker/blob/master/api/models.go
|
|
|
|
type submitTaskRequest struct {
|
|
Project int64 `json:"project"`
|
|
MaxRetries int64 `json:"max_retries"`
|
|
Recipe string `json:"recipe"`
|
|
Priority int64 `json:"priority"`
|
|
MaxAssignTime int64 `json:"max_assign_time"`
|
|
Hash64 int64 `json:"hash_u64"`
|
|
UniqueString string `json:"unique_string"`
|
|
VerificationCount int64 `json:"verification_count"`
|
|
}
|
|
|
|
type releaseTaskRequest struct {
|
|
TaskId int64 `json:"task_id"`
|
|
ResultCode ResultCode `json:"result"`
|
|
Verification int64 `json:"verification"`
|
|
}
|