mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-04-10 05:56:42 +00:00
Smarter HTTP error handling
This commit is contained in:
parent
0ca6deede8
commit
46c0e0bd32
28
errors.go
28
errors.go
@ -3,6 +3,8 @@ package main
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"github.com/valyala/fasthttp"
|
||||||
|
"net"
|
||||||
)
|
)
|
||||||
|
|
||||||
var ErrRateLimit = errors.New("too many requests")
|
var ErrRateLimit = errors.New("too many requests")
|
||||||
@ -15,3 +17,29 @@ type HttpError struct {
|
|||||||
func (e HttpError) Error() string {
|
func (e HttpError) Error() string {
|
||||||
return fmt.Sprintf("http status %d", e.code)
|
return fmt.Sprintf("http status %d", e.code)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func shouldRetry(err error) bool {
|
||||||
|
// HTTP errors
|
||||||
|
if httpErr, ok := err.(*HttpError); ok {
|
||||||
|
switch httpErr.code {
|
||||||
|
case fasthttp.StatusTooManyRequests:
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
// Don't retry HTTP error codes
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if dnsError, ok := err.(*net.DNSError); ok {
|
||||||
|
// Don't retry permanent DNS errors
|
||||||
|
return dnsError.IsTemporary
|
||||||
|
}
|
||||||
|
|
||||||
|
if netErr, ok := err.(*net.OpError); ok {
|
||||||
|
// Don't retry permanent network errors
|
||||||
|
return netErr.Temporary()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retry by default
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
15
worker.go
15
worker.go
@ -3,7 +3,6 @@ package main
|
|||||||
import (
|
import (
|
||||||
"github.com/beeker1121/goque"
|
"github.com/beeker1121/goque"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/valyala/fasthttp"
|
|
||||||
"math"
|
"math"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
@ -55,14 +54,12 @@ func (w *WorkerContext) step(results chan<- File, job Job) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
job.Fails++
|
job.Fails++
|
||||||
|
|
||||||
if httpErr, ok := err.(*HttpError); ok {
|
if !shouldRetry(err) {
|
||||||
switch httpErr.code {
|
atomic.AddUint64(&totalAborted, 1)
|
||||||
case fasthttp.StatusTooManyRequests:
|
logrus.WithField("url", job.UriStr).
|
||||||
err = ErrRateLimit
|
WithError(err).
|
||||||
default:
|
Error("Giving up after failure")
|
||||||
// Don't retry HTTP error codes
|
return
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if job.Fails > config.Retries {
|
if job.Fails > config.Retries {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user