5 Commits

Author SHA1 Message Date
Richard Patel
f90bf94a44 Bump version to v1.1.1 2018-11-27 22:11:57 +01:00
Richard Patel
e82768ff80 Wait time control in config 2018-11-27 22:11:57 +01:00
Richard Patel
b1bf59adef Add The Eye DB to README.md 2018-11-27 17:40:12 +01:00
Richard Patel
a2df2972f4 Bump the upload retry interval up to 30s 2018-11-20 04:13:20 +01:00
Richard Patel
3fc8837dd7 Add output files to .gitignore 2018-11-20 03:51:42 +01:00
6 changed files with 28 additions and 7 deletions

3
.gitignore vendored
View File

@@ -1,3 +1,6 @@
/.idea/ /.idea/
.DS_Store .DS_Store
/od-database-crawler /od-database-crawler
*.log
/queue/
/crawled/

View File

@@ -5,3 +5,5 @@
* Crawls HTTP open directories (standard Web Server Listings) * Crawls HTTP open directories (standard Web Server Listings)
* Gets name, path, size and modification time of all files * Gets name, path, size and modification time of all files
* Lightweight and fast: __over 9000 requests per second__ on a standard laptop * Lightweight and fast: __over 9000 requests per second__ on a standard laptop
https://od-db.the-eye.eu/

View File

@@ -33,7 +33,11 @@ const (
ConfToken = "server.token" ConfToken = "server.token"
ConfServerTimeout = "server.timeout" ConfServerTimeout = "server.timeout"
ConfRecheck = "server.recheck" ConfRecheck = "server.recheck"
ConfCooldown = "server.cooldown"
ConfChunkSize = "server.upload_chunk" ConfChunkSize = "server.upload_chunk"
ConfUploadRetries = "server.upload_retries"
ConfUploadRetryInterval = "server.upload_retry_interval"
ConfTasks = "crawl.tasks" ConfTasks = "crawl.tasks"
ConfRetries = "crawl.retries" ConfRetries = "crawl.retries"
ConfWorkers = "crawl.connections" ConfWorkers = "crawl.connections"
@@ -41,6 +45,7 @@ const (
ConfDialTimeout = "crawl.dial_timeout" ConfDialTimeout = "crawl.dial_timeout"
ConfTimeout = "crawl.timeout" ConfTimeout = "crawl.timeout"
ConfJobBufferSize = "crawl.job_buffer" ConfJobBufferSize = "crawl.job_buffer"
ConfCrawlStats = "output.crawl_stats" ConfCrawlStats = "output.crawl_stats"
ConfAllocStats = "output.resource_stats" ConfAllocStats = "output.resource_stats"
ConfVerbose = "output.verbose" ConfVerbose = "output.verbose"
@@ -54,7 +59,7 @@ func prepareConfig() {
viper.SetDefault(ConfTasks, 3) viper.SetDefault(ConfTasks, 3)
viper.SetDefault(ConfUserAgent, "") viper.SetDefault(ConfUserAgent, "")
viper.SetDefault(ConfDialTimeout, 10 * time.Second) viper.SetDefault(ConfDialTimeout, 10 * time.Second)
viper.SetDefault(ConfTimeout, 30 * time.Second) viper.SetDefault(ConfTimeout, 60 * time.Second)
viper.SetDefault(ConfJobBufferSize, 5000) viper.SetDefault(ConfJobBufferSize, 5000)
viper.SetDefault(ConfCrawlStats, 3 * time.Second) viper.SetDefault(ConfCrawlStats, 3 * time.Second)
viper.SetDefault(ConfAllocStats, 0) viper.SetDefault(ConfAllocStats, 0)
@@ -62,7 +67,10 @@ func prepareConfig() {
viper.SetDefault(ConfPrintHTTP, false) viper.SetDefault(ConfPrintHTTP, false)
viper.SetDefault(ConfLogFile, "") viper.SetDefault(ConfLogFile, "")
viper.SetDefault(ConfRecheck, 3 * time.Second) viper.SetDefault(ConfRecheck, 3 * time.Second)
viper.SetDefault(ConfCooldown, 30 * time.Second)
viper.SetDefault(ConfChunkSize, "1 MB") viper.SetDefault(ConfChunkSize, "1 MB")
viper.SetDefault(ConfUploadRetries, 10)
viper.SetDefault(ConfUploadRetryInterval, 30 * time.Second)
} }
func readConfig() { func readConfig() {

View File

@@ -15,10 +15,17 @@ server:
# between /task/get requests to the server. # between /task/get requests to the server.
recheck: 1s recheck: 1s
# Time to wait after receiving an error
# from the server. Doesn't apply to uploads.
cooldown: 30s
# Upload chunk size # Upload chunk size
# If the value is too high, the upload fails. # If the value is too high, the upload fails.
upload_chunk: 1 MB upload_chunk: 1 MB
upload_retries: 10
upload_retry_interval: 30s
# Log output settings # Log output settings
output: output:
# Crawl statistics # Crawl statistics

View File

@@ -17,7 +17,7 @@ var configFile string
var app = cli.App { var app = cli.App {
Name: "od-database-crawler", Name: "od-database-crawler",
Usage: "OD-Database Go crawler", Usage: "OD-Database Go crawler",
Version: "1.1.0", Version: "1.1.1",
BashComplete: cli.DefaultAppComplete, BashComplete: cli.DefaultAppComplete,
Writer: os.Stdout, Writer: os.Stdout,
Action: cmdBase, Action: cmdBase,
@@ -84,7 +84,7 @@ func cmdBase(_ *cli.Context) error {
if err != nil { if err != nil {
logrus.WithError(err). logrus.WithError(err).
Error("Failed to get new task") Error("Failed to get new task")
time.Sleep(30 * time.Second) time.Sleep(viper.GetDuration(ConfCooldown))
continue continue
} }
if t == nil { if t == nil {
@@ -111,7 +111,7 @@ func cmdBase(_ *cli.Context) error {
} else if err != nil { } else if err != nil {
logrus.WithError(err). logrus.WithError(err).
Error("Failed to get new task") Error("Failed to get new task")
time.Sleep(30 * time.Second) time.Sleep(viper.GetDuration(ConfCooldown))
continue continue
} }
ScheduleTask(inRemotes, t, &baseUri) ScheduleTask(inRemotes, t, &baseUri)

View File

@@ -5,6 +5,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/spf13/viper"
"io" "io"
"mime/multipart" "mime/multipart"
"net/http" "net/http"
@@ -102,10 +103,10 @@ func uploadChunks(websiteId uint64, f *os.File) error {
multi.Close() multi.Close()
for retries := 0; retries < 10; retries++ { for retries := 0; retries < viper.GetInt(ConfUploadRetries); retries++ {
if retries > 0 { if retries > 0 {
// Error occurred, retry upload // Error occurred, retry upload
time.Sleep(5 * time.Second) time.Sleep(viper.GetDuration(ConfUploadRetryInterval))
} }
req, err := http.NewRequest( req, err := http.NewRequest(