mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-04-10 14:06:42 +00:00
Support configuration without config file
This commit is contained in:
parent
108fff0503
commit
527e8895ec
14
README.md
14
README.md
@ -8,3 +8,17 @@
|
|||||||
* Lightweight and fast: __over 9000 requests per second__ on a standard laptop
|
* Lightweight and fast: __over 9000 requests per second__ on a standard laptop
|
||||||
|
|
||||||
https://od-db.the-eye.eu/
|
https://od-db.the-eye.eu/
|
||||||
|
|
||||||
|
#### Usage
|
||||||
|
|
||||||
|
1. With Config File (if `config.yml` found in working dir)
|
||||||
|
- Download [default config](https://github.com/terorie/od-database-crawler/blob/master/config.yml)
|
||||||
|
- Set `server.url` and `server.token`
|
||||||
|
- Start with `./od-database-crawler server --config <file>`
|
||||||
|
|
||||||
|
2. With Flags or env
|
||||||
|
- Override config file if it exists
|
||||||
|
- `--help` for list of flags
|
||||||
|
- Every flag is available as an environment variable:
|
||||||
|
`--server.crawl_stats` ➡️ `OD_SERVER_CRAWL_STATS`
|
||||||
|
- Start with `./od-database-crawler server <flags>`
|
||||||
|
118
config.go
118
config.go
@ -7,6 +7,7 @@ import (
|
|||||||
"github.com/spf13/viper"
|
"github.com/spf13/viper"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
@ -52,32 +53,103 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func prepareConfig() {
|
func prepareConfig() {
|
||||||
viper.SetDefault(ConfRetries, 5)
|
pf := rootCmd.PersistentFlags()
|
||||||
viper.SetDefault(ConfWorkers, 2)
|
|
||||||
viper.SetDefault(ConfTasks, 3)
|
bind := func(s string) {
|
||||||
viper.SetDefault(ConfUserAgent, "")
|
if err := viper.BindPFlag(s, pf.Lookup(s)); err != nil {
|
||||||
viper.SetDefault(ConfDialTimeout, 10 * time.Second)
|
panic(err)
|
||||||
viper.SetDefault(ConfTimeout, 60 * time.Second)
|
}
|
||||||
viper.SetDefault(ConfJobBufferSize, 5000)
|
var envKey string
|
||||||
viper.SetDefault(ConfCrawlStats, 3 * time.Second)
|
envKey = strings.Replace(s, ".", "_", -1)
|
||||||
viper.SetDefault(ConfAllocStats, 0)
|
envKey = strings.ToUpper(envKey)
|
||||||
viper.SetDefault(ConfVerbose, false)
|
envKey = "OD_" + envKey
|
||||||
viper.SetDefault(ConfPrintHTTP, false)
|
if err := viper.BindEnv(s, envKey); err != nil {
|
||||||
viper.SetDefault(ConfLogFile, "")
|
panic(err)
|
||||||
viper.SetDefault(ConfRecheck, 3 * time.Second)
|
}
|
||||||
viper.SetDefault(ConfCooldown, 30 * time.Second)
|
}
|
||||||
viper.SetDefault(ConfChunkSize, "1 MB")
|
|
||||||
viper.SetDefault(ConfUploadRetries, 10)
|
pf.SortFlags = false
|
||||||
viper.SetDefault(ConfUploadRetryInterval, 30 * time.Second)
|
pf.StringVar(&configFile, "config", "", "Config file")
|
||||||
|
configFile = os.Getenv("OD_CONFIG")
|
||||||
|
|
||||||
|
pf.String(ConfServerUrl, "http://od-db.the-eye.eu/api", "OD-DB server URL")
|
||||||
|
bind(ConfServerUrl)
|
||||||
|
|
||||||
|
pf.String(ConfToken, "", "OD-DB access token (env OD_SERVER_TOKEN)")
|
||||||
|
bind(ConfToken)
|
||||||
|
|
||||||
|
pf.Duration(ConfServerTimeout, 60 * time.Second, "OD-DB request timeout")
|
||||||
|
bind(ConfServerTimeout)
|
||||||
|
|
||||||
|
pf.Duration(ConfRecheck, 1 * time.Second, "OD-DB: Poll interval for new jobs")
|
||||||
|
bind(ConfRecheck)
|
||||||
|
|
||||||
|
pf.Duration(ConfCooldown, 30 * time.Second, "OD-DB: Time to wait after a server-side error")
|
||||||
|
bind(ConfCooldown)
|
||||||
|
|
||||||
|
pf.String(ConfChunkSize, "1 MB", "OD-DB: Result upload chunk size")
|
||||||
|
bind(ConfChunkSize)
|
||||||
|
|
||||||
|
pf.Uint(ConfUploadRetries, 10, "OD-DB: Max upload retries")
|
||||||
|
bind(ConfUploadRetries)
|
||||||
|
|
||||||
|
pf.Duration(ConfUploadRetryInterval, 30 * time.Second, "OD-DB: Time to wait between upload retries")
|
||||||
|
bind(ConfUploadRetryInterval)
|
||||||
|
|
||||||
|
pf.Uint(ConfTasks, 100, "Crawler: Max concurrent tasks")
|
||||||
|
bind(ConfTasks)
|
||||||
|
|
||||||
|
pf.Uint(ConfWorkers, 4, "Crawler: Connections per server")
|
||||||
|
bind(ConfWorkers)
|
||||||
|
|
||||||
|
pf.Uint(ConfRetries, 5, "Crawler: Request retries")
|
||||||
|
bind(ConfRetries)
|
||||||
|
|
||||||
|
pf.Duration(ConfDialTimeout, 10 * time.Second, "Crawler: Handshake timeout")
|
||||||
|
bind(ConfDialTimeout)
|
||||||
|
|
||||||
|
pf.Duration(ConfTimeout, 30 * time.Second, "Crawler: Request timeout")
|
||||||
|
bind(ConfTimeout)
|
||||||
|
|
||||||
|
pf.String(ConfUserAgent, "Mozilla/5.0 (X11; od-database-crawler) Gecko/20100101 Firefox/52.0", "Crawler: User-Agent")
|
||||||
|
bind(ConfUserAgent)
|
||||||
|
|
||||||
|
pf.Uint(ConfJobBufferSize, 5000, "Crawler: Task queue cache size")
|
||||||
|
bind(ConfJobBufferSize)
|
||||||
|
|
||||||
|
pf.Duration(ConfCrawlStats, time.Second, "Log: Crawl stats interval")
|
||||||
|
bind(ConfCrawlStats)
|
||||||
|
|
||||||
|
pf.Duration(ConfAllocStats, 10 * time.Second, "Log: Resource stats interval")
|
||||||
|
bind(ConfAllocStats)
|
||||||
|
|
||||||
|
pf.Bool(ConfVerbose, false, "Log: Print every listed dir")
|
||||||
|
bind(ConfVerbose)
|
||||||
|
|
||||||
|
pf.Bool(ConfPrintHTTP, false, "Log: Print HTTP client errors")
|
||||||
|
bind(ConfPrintHTTP)
|
||||||
|
|
||||||
|
pf.String(ConfLogFile, "crawler.log", "Log file")
|
||||||
|
bind(ConfLogFile)
|
||||||
}
|
}
|
||||||
|
|
||||||
func readConfig() {
|
func readConfig() {
|
||||||
viper.AddConfigPath(".")
|
// If config.yml in working dir, use it
|
||||||
viper.SetConfigName("config")
|
if _, err := os.Stat("config.yml"); err == nil {
|
||||||
err := viper.ReadInConfig()
|
configFile = "config.yml"
|
||||||
if err != nil {
|
}
|
||||||
fmt.Fprintln(os.Stderr, err)
|
|
||||||
os.Exit(1)
|
if configFile != "" {
|
||||||
|
var err error
|
||||||
|
confPath, err := filepath.Abs(configFile)
|
||||||
|
if err != nil { panic(err) }
|
||||||
|
|
||||||
|
viper.SetConfigFile(confPath)
|
||||||
|
err = viper.ReadInConfig()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintln(os.Stderr, err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
config.ServerUrl = viper.GetString(ConfServerUrl)
|
config.ServerUrl = viper.GetString(ConfServerUrl)
|
||||||
|
1
help.go
1
help.go
@ -4,7 +4,6 @@ const helpText =
|
|||||||
`HTTP crawler for the OD-Database
|
`HTTP crawler for the OD-Database
|
||||||
DB >> https://od-db.the-eye.eu <<
|
DB >> https://od-db.the-eye.eu <<
|
||||||
Crawler >> https://github.com/terorie/od-database-crawler <<
|
Crawler >> https://github.com/terorie/od-database-crawler <<
|
||||||
Config >> https://bit.ly/2MOAsUp
|
|
||||||
Server >> https://github.com/simon987/od-database <<
|
Server >> https://github.com/simon987/od-database <<
|
||||||
|
|
||||||
Quick start:
|
Quick start:
|
||||||
|
5
main.go
5
main.go
@ -51,8 +51,6 @@ func init() {
|
|||||||
rootCmd.AddCommand(&crawlCmd)
|
rootCmd.AddCommand(&crawlCmd)
|
||||||
rootCmd.AddCommand(&serverCmd)
|
rootCmd.AddCommand(&serverCmd)
|
||||||
|
|
||||||
pf := rootCmd.PersistentFlags()
|
|
||||||
pf.StringVar(&configFile, "config", "", "Config file")
|
|
||||||
prepareConfig()
|
prepareConfig()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -65,9 +63,6 @@ func preRun(cmd *cobra.Command, args []string) error {
|
|||||||
|
|
||||||
readConfig()
|
readConfig()
|
||||||
|
|
||||||
if configFile != "" {
|
|
||||||
viper.SetConfigFile(configFile)
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user