Delete URL queues after crawling

This commit is contained in:
Richard Patel 2018-11-20 03:05:43 +01:00
parent 4f3140a39f
commit b846498030
No known key found for this signature in database
GPG Key ID: C268B2BBDA2ABECB
2 changed files with 22 additions and 9 deletions

View File

@ -2,6 +2,7 @@ package main
import ( import (
"github.com/beeker1121/goque" "github.com/beeker1121/goque"
"os"
"sync" "sync"
"sync/atomic" "sync/atomic"
) )
@ -11,13 +12,15 @@ const (
) )
type BufferedQueue struct { type BufferedQueue struct {
q *goque.Queue dataDir string
outBuf []Job q *goque.Queue
m sync.Mutex buf []Job
m sync.Mutex
} }
func OpenQueue(dataDir string) (bq *BufferedQueue, err error) { func OpenQueue(dataDir string) (bq *BufferedQueue, err error) {
bq = new(BufferedQueue) bq = new(BufferedQueue)
bq.dataDir = dataDir
bq.q, err = goque.OpenQueue(dataDir) bq.q, err = goque.OpenQueue(dataDir)
if err != nil { return nil, err } if err != nil { return nil, err }
return return
@ -59,8 +62,8 @@ func (q *BufferedQueue) directEnqueue(job *Job) bool {
q.m.Lock() q.m.Lock()
defer q.m.Unlock() defer q.m.Unlock()
if len(q.outBuf) < threshold { if len(q.buf) < threshold {
q.outBuf = append(q.outBuf, *job) q.buf = append(q.buf, *job)
return true return true
} else { } else {
return false return false
@ -71,17 +74,25 @@ func (q *BufferedQueue) directDequeue(job *Job) bool {
q.m.Lock() q.m.Lock()
defer q.m.Unlock() defer q.m.Unlock()
if len(q.outBuf) > 0 { if len(q.buf) > 0 {
*job = q.outBuf[0] *job = q.buf[0]
q.outBuf = q.outBuf[1:] q.buf = q.buf[1:]
return true return true
} else { } else {
return false return false
} }
} }
// Always returns nil (But implements io.Closer)
func (q *BufferedQueue) Close() error { func (q *BufferedQueue) Close() error {
return q.q.Close() // Close ignoring errors
q.q.Close()
// Delete files
if err := os.RemoveAll(q.dataDir);
err != nil { panic(err) }
return nil
} }
type JobGob struct { type JobGob struct {

View File

@ -160,6 +160,8 @@ func (o *OD) handleCollect(results chan File, f *os.File, collectErrC chan error
// Wait for all jobs on remote to finish // Wait for all jobs on remote to finish
o.Wait.Wait() o.Wait.Wait()
// Close queue
if err := o.WCtx.Queue.Close(); err != nil { if err := o.WCtx.Queue.Close(); err != nil {
panic(err) panic(err)
} }