mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-12-14 07:39:03 +00:00
Compare commits
2 Commits
task_track
...
hexa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2d72ff3402 | ||
|
|
1b5e6bb7f4 |
13
worker.go
13
worker.go
@@ -3,7 +3,6 @@ package main
|
|||||||
import (
|
import (
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"math"
|
"math"
|
||||||
"strings"
|
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
@@ -12,8 +11,8 @@ import (
|
|||||||
var globalWait sync.WaitGroup
|
var globalWait sync.WaitGroup
|
||||||
|
|
||||||
type WorkerContext struct {
|
type WorkerContext struct {
|
||||||
in chan<- Job
|
in chan<- Job
|
||||||
out <-chan Job
|
out <-chan Job
|
||||||
lastRateLimit time.Time
|
lastRateLimit time.Time
|
||||||
numRateLimits int
|
numRateLimits int
|
||||||
}
|
}
|
||||||
@@ -67,7 +66,7 @@ func (w WorkerContext) step(job Job) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func DoJob(job *Job, f *File) (newJobs []Job, err error) {
|
func DoJob(job *Job, f *File) (newJobs []Job, err error) {
|
||||||
if strings.HasSuffix(job.Uri.Path, "/") {
|
if len(job.Uri.Path) != 0 && job.Uri.Path[len(job.Uri.Path)-1] == '/' {
|
||||||
// Load directory
|
// Load directory
|
||||||
links, err := GetDir(job, f)
|
links, err := GetDir(job, f)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -99,7 +98,7 @@ func DoJob(job *Job, f *File) (newJobs []Job, err error) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
logrus.WithFields(logrus.Fields{
|
logrus.WithFields(logrus.Fields{
|
||||||
"url": job.UriStr,
|
"url": job.UriStr,
|
||||||
"files": len(links),
|
"files": len(links),
|
||||||
}).Debug("Listed")
|
}).Debug("Listed")
|
||||||
} else {
|
} else {
|
||||||
@@ -120,10 +119,10 @@ func (w WorkerContext) queueJob(job Job) {
|
|||||||
globalWait.Add(1)
|
globalWait.Add(1)
|
||||||
|
|
||||||
if w.numRateLimits > 0 {
|
if w.numRateLimits > 0 {
|
||||||
if time.Since(w.lastRateLimit) > 5 * time.Second {
|
if time.Since(w.lastRateLimit) > 5*time.Second {
|
||||||
w.numRateLimits = 0
|
w.numRateLimits = 0
|
||||||
} else {
|
} else {
|
||||||
time.Sleep(time.Duration(math.Sqrt(float64(50 * w.numRateLimits))) *
|
time.Sleep(time.Duration(math.Sqrt(float64(50*w.numRateLimits))) *
|
||||||
100 * time.Millisecond)
|
100 * time.Millisecond)
|
||||||
w.in <- job
|
w.in <- job
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user