mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-04-16 08:56:44 +00:00
Silent HTTP errors
This commit is contained in:
parent
9210996b4c
commit
115983f70e
@ -19,6 +19,7 @@ var config struct {
|
||||
CrawlStats time.Duration
|
||||
AllocStats time.Duration
|
||||
Verbose bool
|
||||
PrintHTTP bool
|
||||
}
|
||||
|
||||
const (
|
||||
@ -31,6 +32,7 @@ const (
|
||||
ConfCrawlStats = "output.crawl_stats"
|
||||
ConfAllocStats = "output.resource_stats"
|
||||
ConfVerbose = "output.verbose"
|
||||
ConfPrintHTTP = "output.http"
|
||||
)
|
||||
|
||||
func prepareConfig() {
|
||||
@ -41,6 +43,7 @@ func prepareConfig() {
|
||||
viper.SetDefault(ConfCrawlStats, 3 * time.Second)
|
||||
viper.SetDefault(ConfAllocStats, 0)
|
||||
viper.SetDefault(ConfVerbose, false)
|
||||
viper.SetDefault(ConfPrintHTTP, false)
|
||||
}
|
||||
|
||||
func readConfig() {
|
||||
@ -88,6 +91,8 @@ func readConfig() {
|
||||
if config.Verbose {
|
||||
logrus.SetLevel(logrus.DebugLevel)
|
||||
}
|
||||
|
||||
config.PrintHTTP = viper.GetBool(ConfPrintHTTP)
|
||||
}
|
||||
|
||||
func configMissing(key string) {
|
||||
|
@ -13,6 +13,8 @@ output:
|
||||
resource_stats: 10s
|
||||
# More output? (Every listed dir)
|
||||
verbose: false
|
||||
# Print HTTP errors (Super spammy)
|
||||
http: false
|
||||
|
||||
# Crawler settings
|
||||
crawl:
|
||||
|
26
worker.go
26
worker.go
@ -43,6 +43,7 @@ func (w WorkerContext) step(results chan<- File, job Job) {
|
||||
if httpErr, ok := err.(*HttpError); ok {
|
||||
switch httpErr.code {
|
||||
case
|
||||
fasthttp.StatusMovedPermanently,
|
||||
fasthttp.StatusFound,
|
||||
fasthttp.StatusUnauthorized,
|
||||
fasthttp.StatusForbidden,
|
||||
@ -84,9 +85,11 @@ func DoJob(job *Job, f *File) (newJobs []Job, err error) {
|
||||
// Load directory
|
||||
links, err := GetDir(job, f)
|
||||
if err != nil {
|
||||
logrus.WithError(err).
|
||||
WithField("url", job.UriStr).
|
||||
Error("Failed getting dir")
|
||||
if !isErrSilent(err) {
|
||||
logrus.WithError(err).
|
||||
WithField("url", job.UriStr).
|
||||
Error("Failed getting dir")
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -133,9 +136,11 @@ func DoJob(job *Job, f *File) (newJobs []Job, err error) {
|
||||
// Load file
|
||||
err := GetFile(job.Uri, f)
|
||||
if err != nil {
|
||||
logrus.WithError(err).
|
||||
WithField("url", job.UriStr).
|
||||
Error("Failed getting file")
|
||||
if !isErrSilent(err) {
|
||||
logrus.WithError(err).
|
||||
WithField("url", job.UriStr).
|
||||
Error("Failed getting file")
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
atomic.AddUint64(&job.OD.Result.FileCount, 1)
|
||||
@ -161,3 +166,12 @@ func (w WorkerContext) queueJob(job Job) {
|
||||
func (w WorkerContext) finishJob(job *Job) {
|
||||
job.OD.Wait.Done()
|
||||
}
|
||||
|
||||
func isErrSilent(err error) bool {
|
||||
if !config.PrintHTTP {
|
||||
if _, ok := err.(*HttpError); ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user