mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-04-19 10:26:43 +00:00
Remove too many crawler block
More logging
This commit is contained in:
parent
bfd7302be8
commit
fa37d45378
15
scheduler.go
15
scheduler.go
@ -2,8 +2,8 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var activeTasks int32
|
var activeTasks int32
|
||||||
@ -17,14 +17,8 @@ func Schedule(c context.Context, remotes <-chan *OD) {
|
|||||||
return
|
return
|
||||||
|
|
||||||
case remote := <-remotes:
|
case remote := <-remotes:
|
||||||
for atomic.LoadInt32(&activeTasks) > config.Tasks {
|
logrus.WithField("url", remote.BaseUri.String()).
|
||||||
select {
|
Info("Starting crawler")
|
||||||
case <-time.After(time.Second):
|
|
||||||
break
|
|
||||||
case <-c.Done():
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Spawn workers
|
// Spawn workers
|
||||||
remote.WCtx.in, remote.WCtx.out = makeJobBuffer(c)
|
remote.WCtx.in, remote.WCtx.out = makeJobBuffer(c)
|
||||||
@ -53,6 +47,9 @@ func (r *OD) Watch() {
|
|||||||
r.Wait.Wait()
|
r.Wait.Wait()
|
||||||
close(r.WCtx.in)
|
close(r.WCtx.in)
|
||||||
atomic.AddInt32(&activeTasks, -1)
|
atomic.AddInt32(&activeTasks, -1)
|
||||||
|
|
||||||
|
logrus.WithField("url", r.BaseUri.String()).
|
||||||
|
Info("Crawler finished")
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeJobBuffer(c context.Context) (chan<- Job, <-chan Job) {
|
func makeJobBuffer(c context.Context) (chan<- Job, <-chan Job) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user