mirror of
				https://github.com/terorie/od-database-crawler.git
				synced 2025-11-04 06:36:52 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			335 lines
		
	
	
		
			8.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			335 lines
		
	
	
		
			8.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
package main
 | 
						|
 | 
						|
import (
 | 
						|
	"bytes"
 | 
						|
	"crypto"
 | 
						|
	"crypto/hmac"
 | 
						|
	"encoding/hex"
 | 
						|
	"encoding/json"
 | 
						|
	"fmt"
 | 
						|
	"github.com/pkg/errors"
 | 
						|
	"github.com/sirupsen/logrus"
 | 
						|
	"golang.org/x/time/rate"
 | 
						|
	"io/ioutil"
 | 
						|
	"net/http"
 | 
						|
	"net/url"
 | 
						|
	"os"
 | 
						|
	"strconv"
 | 
						|
	"time"
 | 
						|
)
 | 
						|
 | 
						|
var serverWorker *TrackerWorker
 | 
						|
 | 
						|
var serverClient = http.Client{
 | 
						|
	Timeout:   config.ServerTimeout,
 | 
						|
	Transport: new(ServerTripper),
 | 
						|
}
 | 
						|
 | 
						|
var serverUserAgent = "od-database-crawler/" + rootCmd.Version
 | 
						|
 | 
						|
//TODO: Move those elsewhere?
 | 
						|
type WorkerAccessRequest struct {
 | 
						|
	Assign  bool `json:"assign"`
 | 
						|
	Submit  bool `json:"submit"`
 | 
						|
	Project int  `json:"project"`
 | 
						|
}
 | 
						|
 | 
						|
//todo: only keep necessary info
 | 
						|
type FetchTaskResponse struct {
 | 
						|
	Ok      bool   `json:"ok"`
 | 
						|
	Message string `json:"message"`
 | 
						|
	Content struct {
 | 
						|
		Task struct {
 | 
						|
			Id       int64 `json:"id"`
 | 
						|
			Priority int64 `json:"priority"`
 | 
						|
			Project  struct {
 | 
						|
				Id         int64      `json:"id"`
 | 
						|
				Priority   int64      `json:"priority"`
 | 
						|
				Name       string     `json:"name"`
 | 
						|
				CloneUrl   string     `json:"clone_url"`
 | 
						|
				GitRepo    string     `json:"git_repo"`
 | 
						|
				Version    string     `json:"version"`
 | 
						|
				Motd       string     `json:"motd"`
 | 
						|
				Public     bool       `json:"public"`
 | 
						|
				Hidden     bool       `json:"hidden"`
 | 
						|
				Chain      int64      `json:"chain"`
 | 
						|
				Paused     bool       `json:"paused"`
 | 
						|
				AssignRate rate.Limit `json:"assign_rate"`
 | 
						|
				SubmitRate rate.Limit `json:"submit_rate"`
 | 
						|
			} `json:"project"`
 | 
						|
			Assignee          int64  `json:"assignee"`
 | 
						|
			Retries           int64  `json:"retries"`
 | 
						|
			MaxRetries        int64  `json:"max_retries"`
 | 
						|
			Status            int64  `json:"status"`
 | 
						|
			Recipe            string `json:"recipe"`
 | 
						|
			MaxAssignTime     int64  `json:"max_assign_time"`
 | 
						|
			AssignTime        int64  `json:"assign_time"`
 | 
						|
			VerificationCount int64  `json:"verification_count"`
 | 
						|
		} `json:"task"`
 | 
						|
	} `json:"content"`
 | 
						|
}
 | 
						|
 | 
						|
type TrackerWorker struct {
 | 
						|
	Alias  string `json:"alias"`
 | 
						|
	Id     int    `json:"id"`
 | 
						|
	Secret []byte `json:"secret"`
 | 
						|
}
 | 
						|
 | 
						|
type CreateTrackerWorkerResponse struct {
 | 
						|
	Ok      bool   `json:"ok"`
 | 
						|
	Message string `json:"message"`
 | 
						|
	Content struct {
 | 
						|
		Worker TrackerWorker `json:"worker"`
 | 
						|
	} `json:"content"`
 | 
						|
}
 | 
						|
 | 
						|
type CreateTrackerWorkerRequest struct {
 | 
						|
	Alias string `json:"alias"`
 | 
						|
}
 | 
						|
 | 
						|
func getOrCreateWorker() *TrackerWorker {
 | 
						|
 | 
						|
	var worker TrackerWorker
 | 
						|
 | 
						|
	if _, err := os.Stat("worker.json"); os.IsNotExist(err) {
 | 
						|
		req := CreateTrackerWorkerRequest{
 | 
						|
			Alias: "crawler", //todo: load from config
 | 
						|
		}
 | 
						|
		body, _ := json.Marshal(&req)
 | 
						|
		buf := bytes.NewBuffer(body)
 | 
						|
		resp, _ := serverClient.Post(config.TrackerUrl+"/worker/create", "application/json", buf)
 | 
						|
		//todo: handle err
 | 
						|
 | 
						|
		fmt.Println(resp.StatusCode)
 | 
						|
 | 
						|
		workerResponse := CreateTrackerWorkerResponse{}
 | 
						|
		respBody, _ := ioutil.ReadAll(resp.Body)
 | 
						|
		_ = json.Unmarshal(respBody, &workerResponse)
 | 
						|
		//todo handle err
 | 
						|
		fmt.Println(workerResponse)
 | 
						|
 | 
						|
		workerJsonData, _ := json.Marshal(&workerResponse.Content.Worker)
 | 
						|
		fp, _ := os.OpenFile("worker.json", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
 | 
						|
		_, _ = fp.Write(workerJsonData)
 | 
						|
		//todo: handle err
 | 
						|
	} else {
 | 
						|
		fp, _ := os.OpenFile("worker.json", os.O_RDONLY, 0600)
 | 
						|
		workerJsonData, _ := ioutil.ReadAll(fp)
 | 
						|
		_ = json.Unmarshal(workerJsonData, &worker)
 | 
						|
		//todo: handle err
 | 
						|
	}
 | 
						|
 | 
						|
	return &worker
 | 
						|
}
 | 
						|
 | 
						|
func FetchTask() (t *Task, err error) {
 | 
						|
 | 
						|
	//todo: this whole block should definitely be extracted elsewhere
 | 
						|
	if serverWorker == nil {
 | 
						|
		serverWorker = getOrCreateWorker()
 | 
						|
 | 
						|
		//Request ASSIGN permission
 | 
						|
		//todo: project ID should be stored as a int in the first place
 | 
						|
		pid, _ := strconv.Atoi(config.TrackerProject)
 | 
						|
		accessReq, _ := json.Marshal(WorkerAccessRequest{
 | 
						|
			Project: int(pid),
 | 
						|
			Assign:  true,
 | 
						|
			Submit:  false,
 | 
						|
		})
 | 
						|
		buf := bytes.NewBuffer(accessReq)
 | 
						|
		res, err := serverClient.Post(config.TrackerUrl+"/project/request_access", "application/json", buf)
 | 
						|
		if err != nil {
 | 
						|
			panic(err)
 | 
						|
		}
 | 
						|
		fmt.Println(res.StatusCode)
 | 
						|
	}
 | 
						|
 | 
						|
	res, err := serverClient.Get(
 | 
						|
		config.TrackerUrl + "/task/get/" + config.TrackerProject)
 | 
						|
 | 
						|
	if err != nil {
 | 
						|
		return
 | 
						|
	}
 | 
						|
	defer res.Body.Close()
 | 
						|
 | 
						|
	switch res.StatusCode {
 | 
						|
	case 200:
 | 
						|
		break
 | 
						|
		//TODO: 404 should not happen.
 | 
						|
	case 404, 500:
 | 
						|
		return nil, nil
 | 
						|
	default:
 | 
						|
		return nil, fmt.Errorf("http %s", res.Status)
 | 
						|
	}
 | 
						|
 | 
						|
	jsonResponse := FetchTaskResponse{}
 | 
						|
	err = json.NewDecoder(res.Body).Decode(&jsonResponse)
 | 
						|
	if _, ok := err.(*json.SyntaxError); ok {
 | 
						|
		return nil, fmt.Errorf("/task/get returned invalid JSON")
 | 
						|
	} else if err != nil {
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	if !jsonResponse.Ok {
 | 
						|
		//The tracker will return Ok=false when no tasks are available
 | 
						|
		err = errors.New(jsonResponse.Message)
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	fmt.Println(jsonResponse.Content.Task.Recipe)
 | 
						|
	task := Task{}
 | 
						|
	err = json.Unmarshal([]byte(jsonResponse.Content.Task.Recipe), &task)
 | 
						|
	if _, ok := err.(*json.SyntaxError); ok {
 | 
						|
		return nil, fmt.Errorf("/task/get returned invalid JSON")
 | 
						|
	} else if err != nil {
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	t = &task
 | 
						|
 | 
						|
	return
 | 
						|
}
 | 
						|
 | 
						|
func PushResult(result *TaskResult, f *os.File) (err error) {
 | 
						|
	if result.WebsiteId == 0 {
 | 
						|
		// Not a real result, don't push
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
 | 
						|
	// Rewind to the beginning of the file
 | 
						|
	_, err = f.Seek(0, 0)
 | 
						|
	if err != nil {
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	err = uploadWebsocket(result.WebsiteId, f)
 | 
						|
	if err != nil {
 | 
						|
		logrus.Errorf("Failed to upload file list: %s", err)
 | 
						|
		err2 := CancelTask(result.WebsiteId)
 | 
						|
		if err2 != nil {
 | 
						|
			logrus.Error(err2)
 | 
						|
		}
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	// Upload result ignoring errors
 | 
						|
	uploadResult(result)
 | 
						|
 | 
						|
	return
 | 
						|
}
 | 
						|
 | 
						|
func uploadWebsocket(websiteId uint64, f *os.File) error {
 | 
						|
 | 
						|
	//TODO:
 | 
						|
	/*
 | 
						|
	 * OD-DB will give you an Upload token when you fetch the task
 | 
						|
	 * Open a WS connection at {ws_bucket_addr}/upload with the 'X-Upload-Token' as header
 | 
						|
	 * Stream whole file as a single WS message
 | 
						|
	 * Close connection
 | 
						|
	 */
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func uploadResult(result *TaskResult) (err error) {
 | 
						|
 | 
						|
	//TODO:
 | 
						|
	/*
 | 
						|
	 * When the file has been uploaded, just release the task with the TR_OK code
 | 
						|
	 * Don't bother sending the ODDB-related stuff, You just need the task id
 | 
						|
	 * Probably a good idea to wrap this around a new releaseTask() function
 | 
						|
	 */
 | 
						|
 | 
						|
	req := releaseTaskRequest{
 | 
						|
		TaskId:     int64(result.WebsiteId),
 | 
						|
		ResultCode: result.ResultCode,
 | 
						|
		// TODO What is verification
 | 
						|
		Verification: 0,
 | 
						|
	}
 | 
						|
 | 
						|
	resultEnc, err := json.Marshal(&req)
 | 
						|
	if err != nil {
 | 
						|
		panic(err)
 | 
						|
	}
 | 
						|
 | 
						|
	res, err := serverClient.PostForm(
 | 
						|
		config.TrackerUrl+"/task/release",
 | 
						|
		url.Values{
 | 
						|
			"token":  {config.Token},
 | 
						|
			"result": {string(resultEnc)},
 | 
						|
		},
 | 
						|
	)
 | 
						|
	if err != nil {
 | 
						|
		return
 | 
						|
	}
 | 
						|
	res.Body.Close()
 | 
						|
 | 
						|
	if res.StatusCode != http.StatusOK {
 | 
						|
		return HttpError{res.StatusCode}
 | 
						|
	}
 | 
						|
 | 
						|
	return
 | 
						|
}
 | 
						|
 | 
						|
func CancelTask(websiteId uint64) (err error) {
 | 
						|
 | 
						|
	//TODO: Maybe wrap this function around releaseTask(cancel: bool) ?
 | 
						|
	return
 | 
						|
}
 | 
						|
 | 
						|
type ServerTripper struct{}
 | 
						|
 | 
						|
func (t *ServerTripper) RoundTrip(req *http.Request) (res *http.Response, err error) {
 | 
						|
	req.Header.Set("User-Agent", serverUserAgent)
 | 
						|
 | 
						|
	//TODO: Move this whole block elsewhere
 | 
						|
	if serverWorker != nil {
 | 
						|
		var content []byte
 | 
						|
		if req.Method == "GET" {
 | 
						|
			content = []byte("/task/get/" + config.TrackerProject) //todo: find a less retarded way of doing that
 | 
						|
		} else {
 | 
						|
			//todo: this is retarded and should be moved elsewhere
 | 
						|
			buf, _ := ioutil.ReadAll(req.Body)
 | 
						|
			rdr1 := ioutil.NopCloser(bytes.NewBuffer(buf))
 | 
						|
			rdr2 := ioutil.NopCloser(bytes.NewBuffer(buf))
 | 
						|
 | 
						|
			content, _ = ioutil.ReadAll(rdr1)
 | 
						|
 | 
						|
			req.Body = rdr2
 | 
						|
		}
 | 
						|
 | 
						|
		ts := time.Now().Format(time.RFC1123)
 | 
						|
 | 
						|
		mac := hmac.New(crypto.SHA256.New, serverWorker.Secret)
 | 
						|
		mac.Write(content)
 | 
						|
		mac.Write([]byte(ts))
 | 
						|
		sig := hex.EncodeToString(mac.Sum(nil))
 | 
						|
 | 
						|
		req.Header.Add("X-Worker-Id", strconv.Itoa(serverWorker.Id))
 | 
						|
		req.Header.Add("Timestamp", time.Now().Format(time.RFC1123))
 | 
						|
		req.Header.Add("X-Signature", sig)
 | 
						|
 | 
						|
	}
 | 
						|
	return http.DefaultTransport.RoundTrip(req)
 | 
						|
}
 | 
						|
 | 
						|
const mimeJSON = "application/json"
 | 
						|
 | 
						|
// https://github.com/simon987/task_tracker/blob/master/api/models.go
 | 
						|
 | 
						|
type submitTaskRequest struct {
 | 
						|
	Project           int64  `json:"project"`
 | 
						|
	MaxRetries        int64  `json:"max_retries"`
 | 
						|
	Recipe            string `json:"recipe"`
 | 
						|
	Priority          int64  `json:"priority"`
 | 
						|
	MaxAssignTime     int64  `json:"max_assign_time"`
 | 
						|
	Hash64            int64  `json:"hash_u64"`
 | 
						|
	UniqueString      string `json:"unique_string"`
 | 
						|
	VerificationCount int64  `json:"verification_count"`
 | 
						|
}
 | 
						|
 | 
						|
type releaseTaskRequest struct {
 | 
						|
	TaskId       int64      `json:"task_id"`
 | 
						|
	ResultCode   ResultCode `json:"result"`
 | 
						|
	Verification int64      `json:"verification"`
 | 
						|
}
 |