From 527e8895ec2224dc86736c7259816b505ae48945 Mon Sep 17 00:00:00 2001
From: Richard Patel <terorie@alphakevin.club>
Date: Sun, 3 Feb 2019 02:54:52 +0100
Subject: [PATCH] Support configuration without config file

---
 README.md |  14 +++++++
 config.go | 118 +++++++++++++++++++++++++++++++++++++++++++-----------
 help.go   |   1 -
 main.go   |   5 ---
 4 files changed, 109 insertions(+), 29 deletions(-)

diff --git a/README.md b/README.md
index 101557d..1b90ae2 100644
--- a/README.md
+++ b/README.md
@@ -8,3 +8,17 @@
  * Lightweight and fast: __over 9000 requests per second__ on a standard laptop
 
 https://od-db.the-eye.eu/
+
+#### Usage
+
+ 1. With Config File (if `config.yml` found in working dir)
+    - Download [default config](https://github.com/terorie/od-database-crawler/blob/master/config.yml)
+    - Set `server.url` and `server.token`
+    - Start with `./od-database-crawler server --config <file>`
+ 
+ 2. With Flags or env
+    - Override config file if it exists
+    - `--help` for list of flags
+    - Every flag is available as an environment variable:
+      `--server.crawl_stats` ➡️ `OD_SERVER_CRAWL_STATS`
+    - Start with `./od-database-crawler server <flags>`
diff --git a/config.go b/config.go
index 6576126..0010e1c 100644
--- a/config.go
+++ b/config.go
@@ -7,6 +7,7 @@ import (
 	"github.com/spf13/viper"
 	"io"
 	"os"
+	"path/filepath"
 	"strings"
 	"time"
 )
@@ -52,32 +53,103 @@ const (
 )
 
 func prepareConfig() {
-	viper.SetDefault(ConfRetries, 5)
-	viper.SetDefault(ConfWorkers, 2)
-	viper.SetDefault(ConfTasks, 3)
-	viper.SetDefault(ConfUserAgent, "")
-	viper.SetDefault(ConfDialTimeout, 10 * time.Second)
-	viper.SetDefault(ConfTimeout, 60 * time.Second)
-	viper.SetDefault(ConfJobBufferSize, 5000)
-	viper.SetDefault(ConfCrawlStats, 3 * time.Second)
-	viper.SetDefault(ConfAllocStats, 0)
-	viper.SetDefault(ConfVerbose, false)
-	viper.SetDefault(ConfPrintHTTP, false)
-	viper.SetDefault(ConfLogFile, "")
-	viper.SetDefault(ConfRecheck, 3 * time.Second)
-	viper.SetDefault(ConfCooldown, 30 * time.Second)
-	viper.SetDefault(ConfChunkSize, "1 MB")
-	viper.SetDefault(ConfUploadRetries, 10)
-	viper.SetDefault(ConfUploadRetryInterval, 30 * time.Second)
+	pf := rootCmd.PersistentFlags()
+
+	bind := func(s string) {
+		if err := viper.BindPFlag(s, pf.Lookup(s)); err != nil {
+			panic(err)
+		}
+		var envKey string
+		envKey = strings.Replace(s, ".", "_", -1)
+		envKey = strings.ToUpper(envKey)
+		envKey = "OD_" + envKey
+		if err := viper.BindEnv(s, envKey); err != nil {
+			panic(err)
+		}
+	}
+
+	pf.SortFlags = false
+	pf.StringVar(&configFile, "config", "", "Config file")
+	configFile = os.Getenv("OD_CONFIG")
+
+	pf.String(ConfServerUrl, "http://od-db.the-eye.eu/api", "OD-DB server URL")
+	bind(ConfServerUrl)
+
+	pf.String(ConfToken, "", "OD-DB access token (env OD_SERVER_TOKEN)")
+	bind(ConfToken)
+
+	pf.Duration(ConfServerTimeout, 60 * time.Second, "OD-DB request timeout")
+	bind(ConfServerTimeout)
+
+	pf.Duration(ConfRecheck, 1 * time.Second, "OD-DB: Poll interval for new jobs")
+	bind(ConfRecheck)
+
+	pf.Duration(ConfCooldown, 30 * time.Second, "OD-DB: Time to wait after a server-side error")
+	bind(ConfCooldown)
+
+	pf.String(ConfChunkSize, "1 MB", "OD-DB: Result upload chunk size")
+	bind(ConfChunkSize)
+
+	pf.Uint(ConfUploadRetries, 10, "OD-DB: Max upload retries")
+	bind(ConfUploadRetries)
+
+	pf.Duration(ConfUploadRetryInterval, 30 * time.Second, "OD-DB: Time to wait between upload retries")
+	bind(ConfUploadRetryInterval)
+
+	pf.Uint(ConfTasks, 100, "Crawler: Max concurrent tasks")
+	bind(ConfTasks)
+
+	pf.Uint(ConfWorkers, 4, "Crawler: Connections per server")
+	bind(ConfWorkers)
+
+	pf.Uint(ConfRetries, 5, "Crawler: Request retries")
+	bind(ConfRetries)
+
+	pf.Duration(ConfDialTimeout, 10 * time.Second, "Crawler: Handshake timeout")
+	bind(ConfDialTimeout)
+
+	pf.Duration(ConfTimeout, 30 * time.Second, "Crawler: Request timeout")
+	bind(ConfTimeout)
+
+	pf.String(ConfUserAgent, "Mozilla/5.0 (X11; od-database-crawler) Gecko/20100101 Firefox/52.0", "Crawler: User-Agent")
+	bind(ConfUserAgent)
+
+	pf.Uint(ConfJobBufferSize, 5000, "Crawler: Task queue cache size")
+	bind(ConfJobBufferSize)
+
+	pf.Duration(ConfCrawlStats, time.Second, "Log: Crawl stats interval")
+	bind(ConfCrawlStats)
+
+	pf.Duration(ConfAllocStats, 10 * time.Second, "Log: Resource stats interval")
+	bind(ConfAllocStats)
+
+	pf.Bool(ConfVerbose, false, "Log: Print every listed dir")
+	bind(ConfVerbose)
+
+	pf.Bool(ConfPrintHTTP, false, "Log: Print HTTP client errors")
+	bind(ConfPrintHTTP)
+
+	pf.String(ConfLogFile, "crawler.log", "Log file")
+	bind(ConfLogFile)
 }
 
 func readConfig() {
-	viper.AddConfigPath(".")
-	viper.SetConfigName("config")
-	err := viper.ReadInConfig()
-	if err != nil {
-		fmt.Fprintln(os.Stderr, err)
-		os.Exit(1)
+	// If config.yml in working dir, use it
+	if _, err := os.Stat("config.yml"); err == nil {
+		configFile = "config.yml"
+	}
+
+	if configFile != "" {
+		var err error
+		confPath, err := filepath.Abs(configFile)
+		if err != nil { panic(err) }
+
+		viper.SetConfigFile(confPath)
+		err = viper.ReadInConfig()
+		if err != nil {
+			fmt.Fprintln(os.Stderr, err)
+			os.Exit(1)
+		}
 	}
 
 	config.ServerUrl = viper.GetString(ConfServerUrl)
diff --git a/help.go b/help.go
index 2965c84..408bf71 100644
--- a/help.go
+++ b/help.go
@@ -4,7 +4,6 @@ const helpText =
 `HTTP crawler for the OD-Database
   DB      >> https://od-db.the-eye.eu <<
   Crawler >> https://github.com/terorie/od-database-crawler <<
-  Config  >> https://bit.ly/2MOAsUp
   Server  >> https://github.com/simon987/od-database <<
 
 Quick start:
diff --git a/main.go b/main.go
index 9131c19..5810407 100644
--- a/main.go
+++ b/main.go
@@ -51,8 +51,6 @@ func init() {
 	rootCmd.AddCommand(&crawlCmd)
 	rootCmd.AddCommand(&serverCmd)
 
-	pf := rootCmd.PersistentFlags()
-	pf.StringVar(&configFile, "config", "", "Config file")
 	prepareConfig()
 }
 
@@ -65,9 +63,6 @@ func preRun(cmd *cobra.Command, args []string) error {
 
 	readConfig()
 
-	if configFile != "" {
-		viper.SetConfigFile(configFile)
-	}
 	return nil
 }