Fix file uploads

This commit is contained in:
Richard Patel 2018-11-17 12:47:16 +01:00
parent 24ee6fcba2
commit 7b29da9340
No known key found for this signature in database
GPG Key ID: C268B2BBDA2ABECB
2 changed files with 20 additions and 27 deletions

View File

@ -72,11 +72,22 @@ func ScheduleTask(remotes chan<- *OD, t *Task, u *fasturl.URL) {
} }
func (o *OD) Watch(results chan File) { func (o *OD) Watch(results chan File) {
filePath := path.Join("crawled", fmt.Sprintf("%d.json", o.Task.WebsiteId))
// Open crawl results file
f, err := os.OpenFile(
filePath,
os.O_CREATE | os.O_RDWR | os.O_TRUNC,
0644,
)
if err != nil { return }
defer f.Close()
defer os.Remove(filePath)
// Wait for the file to be fully written // Wait for the file to be fully written
var fileLock sync.Mutex var fileLock sync.Mutex
fileLock.Lock() fileLock.Lock()
go o.Task.Collect(results, &fileLock) go o.Task.Collect(results, f, &fileLock)
// Wait for all jobs on remote to finish // Wait for all jobs on remote to finish
o.Wait.Wait() o.Wait.Wait()
@ -113,7 +124,7 @@ func (o *OD) Watch(results chan File) {
fileLock.Unlock() fileLock.Unlock()
// Upload results // Upload results
err := PushResult(&o.Result) err = PushResult(&o.Result, f)
if err != nil { if err != nil {
logrus.WithError(err). logrus.WithError(err).
Error("Failed uploading result") Error("Failed uploading result")
@ -123,8 +134,8 @@ func (o *OD) Watch(results chan File) {
globalWait.Done() globalWait.Done()
} }
func (t *Task) Collect(results chan File, done *sync.Mutex) { func (t *Task) Collect(results chan File, f *os.File, done *sync.Mutex) {
err := t.collect(results) err := t.collect(results, f)
if err != nil { if err != nil {
logrus.WithError(err). logrus.WithError(err).
Error("Failed saving crawl results") Error("Failed saving crawl results")
@ -132,18 +143,10 @@ func (t *Task) Collect(results chan File, done *sync.Mutex) {
done.Unlock() done.Unlock()
} }
func (t *Task) collect(results chan File) error { func (t *Task) collect(results chan File, f *os.File) error {
err := os.MkdirAll("crawled", 0755) err := os.MkdirAll("crawled", 0755)
if err != nil { return err } if err != nil { return err }
f, err := os.OpenFile(
path.Join("crawled", fmt.Sprintf("%d.json", t.WebsiteId)),
os.O_CREATE | os.O_WRONLY | os.O_TRUNC,
0755,
)
if err != nil { return err }
defer f.Close()
for result := range results { for result := range results {
result.Path = fasturl.PathUnescape(result.Path) result.Path = fasturl.PathUnescape(result.Path)
result.Name = fasturl.PathUnescape(result.Name) result.Name = fasturl.PathUnescape(result.Name)

View File

@ -10,7 +10,6 @@ import (
"net/http" "net/http"
"net/url" "net/url"
"os" "os"
"path/filepath"
"strconv" "strconv"
) )
@ -43,26 +42,17 @@ func FetchTask() (t *Task, err error) {
return return
} }
func PushResult(result *TaskResult) (err error) { func PushResult(result *TaskResult, f *os.File) (err error) {
if result.WebsiteId == 0 { if result.WebsiteId == 0 {
// Not a real result, don't push // Not a real result, don't push
return nil return nil
} }
filePath := filepath.Join( // Rewind to the beginning of the file
".", "crawled", _, err = f.Seek(0, 0)
fmt.Sprintf("%d.json", result.WebsiteId)) if err != nil {
defer os.Remove(filePath)
f, err := os.Open(filePath)
if os.IsNotExist(err) {
err = fmt.Errorf("cannot upload result: %s does not exist", filePath)
return
} else if err != nil {
return return
} }
defer f.Close()
err = uploadChunks(result.WebsiteId, f) err = uploadChunks(result.WebsiteId, f)
if err != nil { if err != nil {