Add TCP timeout option

This commit is contained in:
Richard Patel 2018-11-20 03:29:10 +01:00
parent b846498030
commit 86ec78cae1
No known key found for this signature in database
GPG Key ID: C268B2BBDA2ABECB
3 changed files with 29 additions and 6 deletions

View File

@ -20,7 +20,6 @@ var config struct {
Retries int
Workers int
UserAgent string
Timeout time.Duration
Tasks int32
CrawlStats time.Duration
AllocStats time.Duration
@ -38,6 +37,7 @@ const (
ConfRetries = "crawl.retries"
ConfWorkers = "crawl.connections"
ConfUserAgent = "crawl.user-agent"
ConfDialTimeout = "crawl.dial_timeout"
ConfTimeout = "crawl.timeout"
ConfCrawlStats = "output.crawl_stats"
ConfAllocStats = "output.resource_stats"
@ -51,7 +51,8 @@ func prepareConfig() {
viper.SetDefault(ConfWorkers, 2)
viper.SetDefault(ConfTasks, 3)
viper.SetDefault(ConfUserAgent, "")
viper.SetDefault(ConfTimeout, 10 * time.Second)
viper.SetDefault(ConfDialTimeout, 10 * time.Second)
viper.SetDefault(ConfTimeout, 30 * time.Second)
viper.SetDefault(ConfCrawlStats, 3 * time.Second)
viper.SetDefault(ConfAllocStats, 0)
viper.SetDefault(ConfVerbose, false)
@ -107,7 +108,9 @@ func readConfig() {
config.UserAgent = viper.GetString(ConfUserAgent)
config.Timeout = viper.GetDuration(ConfTimeout)
setDialTimeout(viper.GetDuration(ConfDialTimeout))
setTimeout(viper.GetDuration(ConfTimeout))
config.CrawlStats = viper.GetDuration(ConfCrawlStats)

View File

@ -52,9 +52,17 @@ crawl:
# from the site before giving up
retries: 5
# Time before discarding a failed connection attempt
dial_timeout: 10s
# Time before discarding a network request
timeout: 10s
timeout: 30s
# Crawler User-Agent
# If empty, no User-Agent header is sent.
user-agent: "Mozilla/5.0 (X11; od-database-crawler) Gecko/20100101 Firefox/52.0"
# Job buffer size (per task)
# Higher values cause less disk writes
# but require more memory.
job_buffer: 5000

View File

@ -8,6 +8,7 @@ import (
"github.com/valyala/fasthttp"
"golang.org/x/crypto/blake2b"
"golang.org/x/net/html"
"net"
"path"
"strconv"
"strings"
@ -20,6 +21,17 @@ var client = fasthttp.Client {
},
}
func setDialTimeout(d time.Duration) {
client.Dial = func(addr string) (net.Conn, error) {
return fasthttp.DialTimeout(addr, d)
}
}
func setTimeout(d time.Duration) {
client.ReadTimeout = d
client.WriteTimeout = d / 2
}
func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
f.IsDir = true
f.Name = path.Base(j.Uri.Path)
@ -33,7 +45,7 @@ func GetDir(j *Job, f *File) (links []fasturl.URL, err error) {
res := fasthttp.AcquireResponse()
defer fasthttp.ReleaseResponse(res)
err = client.DoTimeout(req, res, config.Timeout)
err = client.Do(req, res)
fasthttp.ReleaseRequest(req)
if err != nil {
@ -132,7 +144,7 @@ func GetFile(u fasturl.URL, f *File) (err error) {
res.SkipBody = true
defer fasthttp.ReleaseResponse(res)
err = client.DoTimeout(req, res, config.Timeout)
err = client.Do(req, res)
fasthttp.ReleaseRequest(req)
if err != nil {