diff --git a/config.go b/config.go index 3e2f293..dc0610e 100644 --- a/config.go +++ b/config.go @@ -9,30 +9,33 @@ import ( ) var config struct { - ServerUrl string - Token string - Retries int - Workers int - Tasks int32 - StatsInterval time.Duration - Verbose bool + ServerUrl string + Token string + Retries int + Workers int + Tasks int32 + CrawlStats time.Duration + AllocStats time.Duration + Verbose bool } const ( - ConfServerUrl = "server.url" - ConfToken = "server.token" - ConfTasks = "crawl.tasks" - ConfRetries = "crawl.retries" - ConfWorkers = "crawl.connections" - ConfStatsInterval = "output.stats_interval" - ConfVerbose = "output.verbose" + ConfServerUrl = "server.url" + ConfToken = "server.token" + ConfTasks = "crawl.tasks" + ConfRetries = "crawl.retries" + ConfWorkers = "crawl.connections" + ConfCrawlStats = "output.crawl_stats" + ConfAllocStats = "output.resource_stats" + ConfVerbose = "output.verbose" ) func prepareConfig() { viper.SetDefault(ConfRetries, 5) viper.SetDefault(ConfWorkers, 2) viper.SetDefault(ConfTasks, 3) - viper.SetDefault(ConfStatsInterval, 3 * time.Second) + viper.SetDefault(ConfCrawlStats, 3 * time.Second) + viper.SetDefault(ConfAllocStats, 0) viper.SetDefault(ConfVerbose, false) } @@ -70,7 +73,9 @@ func readConfig() { configOOB(ConfTasks, int(config.Tasks)) } - config.StatsInterval = viper.GetDuration(ConfStatsInterval) + config.CrawlStats = viper.GetDuration(ConfCrawlStats) + + config.AllocStats = viper.GetDuration(ConfAllocStats) config.Verbose = viper.GetBool(ConfVerbose) if config.Verbose { diff --git a/config.yml b/config.yml index efb0b4b..dc0b6bd 100644 --- a/config.yml +++ b/config.yml @@ -7,8 +7,10 @@ server: # Log output settings output: - # Statistics printing interval - stats_interval: 1s + # Crawl statistics + crawl_stats: 1s + # CPU/RAM/Job queue stats + resource_stats: 1s # More output? (Every listed dir) verbose: false diff --git a/scheduler.go b/scheduler.go index 89e99fd..c6b183d 100644 --- a/scheduler.go +++ b/scheduler.go @@ -7,6 +7,7 @@ import ( ) var activeTasks int32 +var totalBuffered int64 func Schedule(c context.Context, remotes <-chan *OD) { go Stats(c) @@ -75,6 +76,7 @@ func bufferJobs(c context.Context, in chan Job, out chan Job) { if !ok { in = nil } else { + atomic.AddInt64(&totalBuffered, 1) inQueue = append(inQueue, v) } case <-c.Done(): @@ -86,9 +88,11 @@ func bufferJobs(c context.Context, in chan Job, out chan Job) { if !ok { in = nil } else { + atomic.AddInt64(&totalBuffered, 1) inQueue = append(inQueue, v) } case outCh() <- inQueue[0]: + atomic.AddInt64(&totalBuffered, -1) inQueue = inQueue[1:] case <-c.Done(): return diff --git a/stats.go b/stats.go index f78e557..61e14da 100644 --- a/stats.go +++ b/stats.go @@ -4,6 +4,7 @@ import ( "context" "github.com/sirupsen/logrus" "math" + "runtime" "sync/atomic" "time" ) @@ -15,14 +16,23 @@ var totalAborted uint64 func Stats(c context.Context) { var startedLast uint64 = 0 - ticker := time.NewTicker(config.StatsInterval).C + var crawlTicker <-chan time.Time + var allocTicker <-chan time.Time + + if config.CrawlStats != 0 { + crawlTicker = time.NewTicker(config.CrawlStats).C + } + if config.AllocStats != 0 { + allocTicker = time.NewTicker(config.AllocStats).C + } + for { select { - case <-ticker: + case <-crawlTicker: startedNow := atomic.LoadUint64(&totalStarted) perSecond := float64(startedNow - startedLast) / - config.StatsInterval.Seconds() + config.CrawlStats.Seconds() // Round to .5 perSecond *= 2 @@ -34,10 +44,21 @@ func Stats(c context.Context) { "done": atomic.LoadUint64(&totalDone), "retries": atomic.LoadUint64(&totalRetries), "aborted": atomic.LoadUint64(&totalAborted), - }).Info("Stats") + }).Info("Crawl Stats") startedLast = startedNow + case <-allocTicker: + var mem runtime.MemStats + runtime.ReadMemStats(&mem) + + logrus.WithFields(logrus.Fields{ + "queue_count": totalBuffered, + "heap": FormatByteCount(mem.Alloc), + "objects": mem.HeapObjects, + "num_gc": mem.NumGC, + }).Info("Resource Stats") + case <-c.Done(): return } diff --git a/util.go b/util.go new file mode 100644 index 0000000..6db4c4f --- /dev/null +++ b/util.go @@ -0,0 +1,18 @@ +package main + +import "fmt" + +// https://programming.guide/go/formatting-byte-size-to-human-readable-format.html +func FormatByteCount(b uint64) string { + const unit = 1024 + if b < unit { + return fmt.Sprintf("%d B", b) + } else { + div, exp := int64(unit), 0 + for n := b / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f %ciB", float64(b)/float64(div), "KMGTPE"[exp]) + } +}