mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-04-18 01:46:43 +00:00
Delete URL queues after crawling
This commit is contained in:
parent
4f3140a39f
commit
b846498030
29
queue.go
29
queue.go
@ -2,6 +2,7 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/beeker1121/goque"
|
"github.com/beeker1121/goque"
|
||||||
|
"os"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
)
|
)
|
||||||
@ -11,13 +12,15 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type BufferedQueue struct {
|
type BufferedQueue struct {
|
||||||
q *goque.Queue
|
dataDir string
|
||||||
outBuf []Job
|
q *goque.Queue
|
||||||
m sync.Mutex
|
buf []Job
|
||||||
|
m sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func OpenQueue(dataDir string) (bq *BufferedQueue, err error) {
|
func OpenQueue(dataDir string) (bq *BufferedQueue, err error) {
|
||||||
bq = new(BufferedQueue)
|
bq = new(BufferedQueue)
|
||||||
|
bq.dataDir = dataDir
|
||||||
bq.q, err = goque.OpenQueue(dataDir)
|
bq.q, err = goque.OpenQueue(dataDir)
|
||||||
if err != nil { return nil, err }
|
if err != nil { return nil, err }
|
||||||
return
|
return
|
||||||
@ -59,8 +62,8 @@ func (q *BufferedQueue) directEnqueue(job *Job) bool {
|
|||||||
q.m.Lock()
|
q.m.Lock()
|
||||||
defer q.m.Unlock()
|
defer q.m.Unlock()
|
||||||
|
|
||||||
if len(q.outBuf) < threshold {
|
if len(q.buf) < threshold {
|
||||||
q.outBuf = append(q.outBuf, *job)
|
q.buf = append(q.buf, *job)
|
||||||
return true
|
return true
|
||||||
} else {
|
} else {
|
||||||
return false
|
return false
|
||||||
@ -71,17 +74,25 @@ func (q *BufferedQueue) directDequeue(job *Job) bool {
|
|||||||
q.m.Lock()
|
q.m.Lock()
|
||||||
defer q.m.Unlock()
|
defer q.m.Unlock()
|
||||||
|
|
||||||
if len(q.outBuf) > 0 {
|
if len(q.buf) > 0 {
|
||||||
*job = q.outBuf[0]
|
*job = q.buf[0]
|
||||||
q.outBuf = q.outBuf[1:]
|
q.buf = q.buf[1:]
|
||||||
return true
|
return true
|
||||||
} else {
|
} else {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Always returns nil (But implements io.Closer)
|
||||||
func (q *BufferedQueue) Close() error {
|
func (q *BufferedQueue) Close() error {
|
||||||
return q.q.Close()
|
// Close ignoring errors
|
||||||
|
q.q.Close()
|
||||||
|
|
||||||
|
// Delete files
|
||||||
|
if err := os.RemoveAll(q.dataDir);
|
||||||
|
err != nil { panic(err) }
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobGob struct {
|
type JobGob struct {
|
||||||
|
@ -160,6 +160,8 @@ func (o *OD) handleCollect(results chan File, f *os.File, collectErrC chan error
|
|||||||
|
|
||||||
// Wait for all jobs on remote to finish
|
// Wait for all jobs on remote to finish
|
||||||
o.Wait.Wait()
|
o.Wait.Wait()
|
||||||
|
|
||||||
|
// Close queue
|
||||||
if err := o.WCtx.Queue.Close(); err != nil {
|
if err := o.WCtx.Queue.Close(); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user