diff --git a/README.md b/README.md index 101557d..1b90ae2 100644 --- a/README.md +++ b/README.md @@ -8,3 +8,17 @@ * Lightweight and fast: __over 9000 requests per second__ on a standard laptop https://od-db.the-eye.eu/ + +#### Usage + + 1. With Config File (if `config.yml` found in working dir) + - Download [default config](https://github.com/terorie/od-database-crawler/blob/master/config.yml) + - Set `server.url` and `server.token` + - Start with `./od-database-crawler server --config ` + + 2. With Flags or env + - Override config file if it exists + - `--help` for list of flags + - Every flag is available as an environment variable: + `--server.crawl_stats` ➡️ `OD_SERVER_CRAWL_STATS` + - Start with `./od-database-crawler server ` diff --git a/config.go b/config.go index 6576126..0010e1c 100644 --- a/config.go +++ b/config.go @@ -7,6 +7,7 @@ import ( "github.com/spf13/viper" "io" "os" + "path/filepath" "strings" "time" ) @@ -52,32 +53,103 @@ const ( ) func prepareConfig() { - viper.SetDefault(ConfRetries, 5) - viper.SetDefault(ConfWorkers, 2) - viper.SetDefault(ConfTasks, 3) - viper.SetDefault(ConfUserAgent, "") - viper.SetDefault(ConfDialTimeout, 10 * time.Second) - viper.SetDefault(ConfTimeout, 60 * time.Second) - viper.SetDefault(ConfJobBufferSize, 5000) - viper.SetDefault(ConfCrawlStats, 3 * time.Second) - viper.SetDefault(ConfAllocStats, 0) - viper.SetDefault(ConfVerbose, false) - viper.SetDefault(ConfPrintHTTP, false) - viper.SetDefault(ConfLogFile, "") - viper.SetDefault(ConfRecheck, 3 * time.Second) - viper.SetDefault(ConfCooldown, 30 * time.Second) - viper.SetDefault(ConfChunkSize, "1 MB") - viper.SetDefault(ConfUploadRetries, 10) - viper.SetDefault(ConfUploadRetryInterval, 30 * time.Second) + pf := rootCmd.PersistentFlags() + + bind := func(s string) { + if err := viper.BindPFlag(s, pf.Lookup(s)); err != nil { + panic(err) + } + var envKey string + envKey = strings.Replace(s, ".", "_", -1) + envKey = strings.ToUpper(envKey) + envKey = "OD_" + envKey + if err := viper.BindEnv(s, envKey); err != nil { + panic(err) + } + } + + pf.SortFlags = false + pf.StringVar(&configFile, "config", "", "Config file") + configFile = os.Getenv("OD_CONFIG") + + pf.String(ConfServerUrl, "http://od-db.the-eye.eu/api", "OD-DB server URL") + bind(ConfServerUrl) + + pf.String(ConfToken, "", "OD-DB access token (env OD_SERVER_TOKEN)") + bind(ConfToken) + + pf.Duration(ConfServerTimeout, 60 * time.Second, "OD-DB request timeout") + bind(ConfServerTimeout) + + pf.Duration(ConfRecheck, 1 * time.Second, "OD-DB: Poll interval for new jobs") + bind(ConfRecheck) + + pf.Duration(ConfCooldown, 30 * time.Second, "OD-DB: Time to wait after a server-side error") + bind(ConfCooldown) + + pf.String(ConfChunkSize, "1 MB", "OD-DB: Result upload chunk size") + bind(ConfChunkSize) + + pf.Uint(ConfUploadRetries, 10, "OD-DB: Max upload retries") + bind(ConfUploadRetries) + + pf.Duration(ConfUploadRetryInterval, 30 * time.Second, "OD-DB: Time to wait between upload retries") + bind(ConfUploadRetryInterval) + + pf.Uint(ConfTasks, 100, "Crawler: Max concurrent tasks") + bind(ConfTasks) + + pf.Uint(ConfWorkers, 4, "Crawler: Connections per server") + bind(ConfWorkers) + + pf.Uint(ConfRetries, 5, "Crawler: Request retries") + bind(ConfRetries) + + pf.Duration(ConfDialTimeout, 10 * time.Second, "Crawler: Handshake timeout") + bind(ConfDialTimeout) + + pf.Duration(ConfTimeout, 30 * time.Second, "Crawler: Request timeout") + bind(ConfTimeout) + + pf.String(ConfUserAgent, "Mozilla/5.0 (X11; od-database-crawler) Gecko/20100101 Firefox/52.0", "Crawler: User-Agent") + bind(ConfUserAgent) + + pf.Uint(ConfJobBufferSize, 5000, "Crawler: Task queue cache size") + bind(ConfJobBufferSize) + + pf.Duration(ConfCrawlStats, time.Second, "Log: Crawl stats interval") + bind(ConfCrawlStats) + + pf.Duration(ConfAllocStats, 10 * time.Second, "Log: Resource stats interval") + bind(ConfAllocStats) + + pf.Bool(ConfVerbose, false, "Log: Print every listed dir") + bind(ConfVerbose) + + pf.Bool(ConfPrintHTTP, false, "Log: Print HTTP client errors") + bind(ConfPrintHTTP) + + pf.String(ConfLogFile, "crawler.log", "Log file") + bind(ConfLogFile) } func readConfig() { - viper.AddConfigPath(".") - viper.SetConfigName("config") - err := viper.ReadInConfig() - if err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) + // If config.yml in working dir, use it + if _, err := os.Stat("config.yml"); err == nil { + configFile = "config.yml" + } + + if configFile != "" { + var err error + confPath, err := filepath.Abs(configFile) + if err != nil { panic(err) } + + viper.SetConfigFile(confPath) + err = viper.ReadInConfig() + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } } config.ServerUrl = viper.GetString(ConfServerUrl) diff --git a/help.go b/help.go index 2965c84..408bf71 100644 --- a/help.go +++ b/help.go @@ -4,7 +4,6 @@ const helpText = `HTTP crawler for the OD-Database DB >> https://od-db.the-eye.eu << Crawler >> https://github.com/terorie/od-database-crawler << - Config >> https://bit.ly/2MOAsUp Server >> https://github.com/simon987/od-database << Quick start: diff --git a/main.go b/main.go index 9131c19..5810407 100644 --- a/main.go +++ b/main.go @@ -51,8 +51,6 @@ func init() { rootCmd.AddCommand(&crawlCmd) rootCmd.AddCommand(&serverCmd) - pf := rootCmd.PersistentFlags() - pf.StringVar(&configFile, "config", "", "Config file") prepareConfig() } @@ -65,9 +63,6 @@ func preRun(cmd *cobra.Command, args []string) error { readConfig() - if configFile != "" { - viper.SetConfigFile(configFile) - } return nil }