3 Commits

Author SHA1 Message Date
Richard Patel
5239af08f7 Bump version to v1.2.1 2019-02-03 03:36:39 +01:00
Richard Patel
46c0e0bd32 Smarter HTTP error handling 2019-02-03 03:35:09 +01:00
Richard Patel
0ca6deede8 Fix --config flag 2019-02-03 03:26:48 +01:00
4 changed files with 48 additions and 18 deletions

View File

@@ -7,7 +7,6 @@ import (
"github.com/spf13/viper"
"io"
"os"
"path/filepath"
"strings"
"time"
)
@@ -135,17 +134,23 @@ func prepareConfig() {
func readConfig() {
// If config.yml in working dir, use it
if _, err := os.Stat("config.yml"); err == nil {
configFile = "config.yml"
if configFile == "" {
_, err := os.Stat("config.yml")
if err == nil {
configFile = "config.yml"
}
}
if configFile != "" {
var err error
confPath, err := filepath.Abs(configFile)
if err != nil { panic(err) }
confF, err := os.Open(configFile)
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
defer confF.Close()
viper.SetConfigFile(confPath)
err = viper.ReadInConfig()
viper.SetConfigType("yml")
err = viper.ReadConfig(confF)
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)

View File

@@ -3,6 +3,8 @@ package main
import (
"errors"
"fmt"
"github.com/valyala/fasthttp"
"net"
)
var ErrRateLimit = errors.New("too many requests")
@@ -15,3 +17,29 @@ type HttpError struct {
func (e HttpError) Error() string {
return fmt.Sprintf("http status %d", e.code)
}
func shouldRetry(err error) bool {
// HTTP errors
if httpErr, ok := err.(*HttpError); ok {
switch httpErr.code {
case fasthttp.StatusTooManyRequests:
return true
default:
// Don't retry HTTP error codes
return false
}
}
if dnsError, ok := err.(*net.DNSError); ok {
// Don't retry permanent DNS errors
return dnsError.IsTemporary
}
if netErr, ok := err.(*net.OpError); ok {
// Don't retry permanent network errors
return netErr.Temporary()
}
// Retry by default
return true
}

View File

@@ -17,7 +17,7 @@ var configFile string
var rootCmd = cobra.Command {
Use: "od-database-crawler",
Version: "1.2.0",
Version: "1.2.1",
Short: "OD-Database Go crawler",
Long: helpText,
PersistentPreRunE: preRun,

View File

@@ -3,7 +3,6 @@ package main
import (
"github.com/beeker1121/goque"
"github.com/sirupsen/logrus"
"github.com/valyala/fasthttp"
"math"
"sort"
"strings"
@@ -55,14 +54,12 @@ func (w *WorkerContext) step(results chan<- File, job Job) {
if err != nil {
job.Fails++
if httpErr, ok := err.(*HttpError); ok {
switch httpErr.code {
case fasthttp.StatusTooManyRequests:
err = ErrRateLimit
default:
// Don't retry HTTP error codes
return
}
if !shouldRetry(err) {
atomic.AddUint64(&totalAborted, 1)
logrus.WithField("url", job.UriStr).
WithError(err).
Error("Giving up after failure")
return
}
if job.Fails > config.Retries {