mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-04-19 18:36:43 +00:00
Silent HTTP errors
This commit is contained in:
parent
9210996b4c
commit
115983f70e
@ -19,6 +19,7 @@ var config struct {
|
|||||||
CrawlStats time.Duration
|
CrawlStats time.Duration
|
||||||
AllocStats time.Duration
|
AllocStats time.Duration
|
||||||
Verbose bool
|
Verbose bool
|
||||||
|
PrintHTTP bool
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -31,6 +32,7 @@ const (
|
|||||||
ConfCrawlStats = "output.crawl_stats"
|
ConfCrawlStats = "output.crawl_stats"
|
||||||
ConfAllocStats = "output.resource_stats"
|
ConfAllocStats = "output.resource_stats"
|
||||||
ConfVerbose = "output.verbose"
|
ConfVerbose = "output.verbose"
|
||||||
|
ConfPrintHTTP = "output.http"
|
||||||
)
|
)
|
||||||
|
|
||||||
func prepareConfig() {
|
func prepareConfig() {
|
||||||
@ -41,6 +43,7 @@ func prepareConfig() {
|
|||||||
viper.SetDefault(ConfCrawlStats, 3 * time.Second)
|
viper.SetDefault(ConfCrawlStats, 3 * time.Second)
|
||||||
viper.SetDefault(ConfAllocStats, 0)
|
viper.SetDefault(ConfAllocStats, 0)
|
||||||
viper.SetDefault(ConfVerbose, false)
|
viper.SetDefault(ConfVerbose, false)
|
||||||
|
viper.SetDefault(ConfPrintHTTP, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
func readConfig() {
|
func readConfig() {
|
||||||
@ -88,6 +91,8 @@ func readConfig() {
|
|||||||
if config.Verbose {
|
if config.Verbose {
|
||||||
logrus.SetLevel(logrus.DebugLevel)
|
logrus.SetLevel(logrus.DebugLevel)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
config.PrintHTTP = viper.GetBool(ConfPrintHTTP)
|
||||||
}
|
}
|
||||||
|
|
||||||
func configMissing(key string) {
|
func configMissing(key string) {
|
||||||
|
@ -13,6 +13,8 @@ output:
|
|||||||
resource_stats: 10s
|
resource_stats: 10s
|
||||||
# More output? (Every listed dir)
|
# More output? (Every listed dir)
|
||||||
verbose: false
|
verbose: false
|
||||||
|
# Print HTTP errors (Super spammy)
|
||||||
|
http: false
|
||||||
|
|
||||||
# Crawler settings
|
# Crawler settings
|
||||||
crawl:
|
crawl:
|
||||||
|
14
worker.go
14
worker.go
@ -43,6 +43,7 @@ func (w WorkerContext) step(results chan<- File, job Job) {
|
|||||||
if httpErr, ok := err.(*HttpError); ok {
|
if httpErr, ok := err.(*HttpError); ok {
|
||||||
switch httpErr.code {
|
switch httpErr.code {
|
||||||
case
|
case
|
||||||
|
fasthttp.StatusMovedPermanently,
|
||||||
fasthttp.StatusFound,
|
fasthttp.StatusFound,
|
||||||
fasthttp.StatusUnauthorized,
|
fasthttp.StatusUnauthorized,
|
||||||
fasthttp.StatusForbidden,
|
fasthttp.StatusForbidden,
|
||||||
@ -84,9 +85,11 @@ func DoJob(job *Job, f *File) (newJobs []Job, err error) {
|
|||||||
// Load directory
|
// Load directory
|
||||||
links, err := GetDir(job, f)
|
links, err := GetDir(job, f)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if !isErrSilent(err) {
|
||||||
logrus.WithError(err).
|
logrus.WithError(err).
|
||||||
WithField("url", job.UriStr).
|
WithField("url", job.UriStr).
|
||||||
Error("Failed getting dir")
|
Error("Failed getting dir")
|
||||||
|
}
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -133,9 +136,11 @@ func DoJob(job *Job, f *File) (newJobs []Job, err error) {
|
|||||||
// Load file
|
// Load file
|
||||||
err := GetFile(job.Uri, f)
|
err := GetFile(job.Uri, f)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if !isErrSilent(err) {
|
||||||
logrus.WithError(err).
|
logrus.WithError(err).
|
||||||
WithField("url", job.UriStr).
|
WithField("url", job.UriStr).
|
||||||
Error("Failed getting file")
|
Error("Failed getting file")
|
||||||
|
}
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
atomic.AddUint64(&job.OD.Result.FileCount, 1)
|
atomic.AddUint64(&job.OD.Result.FileCount, 1)
|
||||||
@ -161,3 +166,12 @@ func (w WorkerContext) queueJob(job Job) {
|
|||||||
func (w WorkerContext) finishJob(job *Job) {
|
func (w WorkerContext) finishJob(job *Job) {
|
||||||
job.OD.Wait.Done()
|
job.OD.Wait.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isErrSilent(err error) bool {
|
||||||
|
if !config.PrintHTTP {
|
||||||
|
if _, ok := err.(*HttpError); ok {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user