diff --git a/config.go b/config.go index e949a83..f58d9a3 100644 --- a/config.go +++ b/config.go @@ -1,9 +1,11 @@ package main import ( + "bufio" "fmt" "github.com/sirupsen/logrus" "github.com/spf13/viper" + "io" "os" "strings" "time" @@ -41,6 +43,7 @@ const ( ConfAllocStats = "output.resource_stats" ConfVerbose = "output.verbose" ConfPrintHTTP = "output.http" + ConfLogFile = "output.log" ) func prepareConfig() { @@ -53,6 +56,7 @@ func prepareConfig() { viper.SetDefault(ConfAllocStats, 0) viper.SetDefault(ConfVerbose, false) viper.SetDefault(ConfPrintHTTP, false) + viper.SetDefault(ConfLogFile, "") viper.SetDefault(ConfRecheck, 3 * time.Second) viper.SetDefault(ConfChunkSize, "1 MB") } @@ -114,6 +118,17 @@ func readConfig() { logrus.SetLevel(logrus.DebugLevel) } + if filePath := viper.GetString(ConfLogFile); filePath != "" { + f, err := os.OpenFile(filePath, os.O_CREATE | os.O_WRONLY | os.O_APPEND, 0644) + bufWriter := bufio.NewWriter(f) + if err != nil { panic(err) } + exitHooks.Add(func() { + bufWriter.Flush() + f.Close() + }) + logrus.SetOutput(io.MultiWriter(os.Stdout, bufWriter)) + } + config.PrintHTTP = viper.GetBool(ConfPrintHTTP) } diff --git a/config.yml b/config.yml index f59555e..bb9aafe 100644 --- a/config.yml +++ b/config.yml @@ -23,13 +23,20 @@ server: output: # Crawl statistics crawl_stats: 1s + # CPU/RAM/Job queue stats resource_stats: 10s + # More output? (Every listed dir) verbose: false + # Print HTTP errors (Super spammy) http: false + # Log file + # If empty, no log file is created. + log: crawler.log + # Crawler settings crawl: # Number of sites that can be processed at once @@ -49,4 +56,5 @@ crawl: timeout: 10s # Crawler User-Agent + # If empty, no User-Agent header is sent. user-agent: "Mozilla/5.0 (X11; od-database-crawler) Gecko/20100101 Firefox/52.0" diff --git a/main.go b/main.go index 5a1194a..32e1998 100644 --- a/main.go +++ b/main.go @@ -5,9 +5,6 @@ import ( "github.com/sirupsen/logrus" "github.com/terorie/od-database-crawler/fasturl" "github.com/urfave/cli" - "log" - "net/http" - _ "net/http/pprof" "os" "strings" "sync/atomic" @@ -29,28 +26,29 @@ var app = cli.App { Action: cmdCrawler, }, }, + After: func(i *cli.Context) error { + exitHooks.Execute() + return nil + }, } +var exitHooks Hooks + func init() { prepareConfig() } func main() { - go func() { - log.Println(http.ListenAndServe("localhost:42069", nil)) - }() - err := os.MkdirAll("crawled", 0755) if err != nil { panic(err) } + readConfig() app.Run(os.Args) } func cmdBase(_ *cli.Context) error { - readConfig() - // TODO Graceful shutdown appCtx := context.Background() forceCtx := context.Background() @@ -107,8 +105,6 @@ func cmdBase(_ *cli.Context) error { } func cmdCrawler(clic *cli.Context) error { - readConfig() - if clic.NArg() != 1 { cli.ShowCommandHelpAndExit(clic, "crawl", 1) } diff --git a/util.go b/util.go index 6db4c4f..a23c906 100644 --- a/util.go +++ b/util.go @@ -1,6 +1,9 @@ package main -import "fmt" +import ( + "fmt" + "sync" +) // https://programming.guide/go/formatting-byte-size-to-human-readable-format.html func FormatByteCount(b uint64) string { @@ -16,3 +19,20 @@ func FormatByteCount(b uint64) string { return fmt.Sprintf("%.1f %ciB", float64(b)/float64(div), "KMGTPE"[exp]) } } + +type Hooks struct { + m sync.Mutex + l []func() +} + +func (h *Hooks) Add(hook func()) { + h.m.Lock() + h.l = append(h.l, hook) + h.m.Unlock() +} + +func (h *Hooks) Execute() { + for _, hook := range h.l { + hook() + } +}