5 Commits

Author SHA1 Message Date
Richard Patel
f90bf94a44 Bump version to v1.1.1 2018-11-27 22:11:57 +01:00
Richard Patel
e82768ff80 Wait time control in config 2018-11-27 22:11:57 +01:00
Richard Patel
b1bf59adef Add The Eye DB to README.md 2018-11-27 17:40:12 +01:00
Richard Patel
a2df2972f4 Bump the upload retry interval up to 30s 2018-11-20 04:13:20 +01:00
Richard Patel
3fc8837dd7 Add output files to .gitignore 2018-11-20 03:51:42 +01:00
6 changed files with 28 additions and 7 deletions

5
.gitignore vendored
View File

@@ -1,3 +1,6 @@
/.idea/
.DS_Store
/od-database-crawler
/od-database-crawler
*.log
/queue/
/crawled/

View File

@@ -5,3 +5,5 @@
* Crawls HTTP open directories (standard Web Server Listings)
* Gets name, path, size and modification time of all files
* Lightweight and fast: __over 9000 requests per second__ on a standard laptop
https://od-db.the-eye.eu/

View File

@@ -33,7 +33,11 @@ const (
ConfToken = "server.token"
ConfServerTimeout = "server.timeout"
ConfRecheck = "server.recheck"
ConfCooldown = "server.cooldown"
ConfChunkSize = "server.upload_chunk"
ConfUploadRetries = "server.upload_retries"
ConfUploadRetryInterval = "server.upload_retry_interval"
ConfTasks = "crawl.tasks"
ConfRetries = "crawl.retries"
ConfWorkers = "crawl.connections"
@@ -41,6 +45,7 @@ const (
ConfDialTimeout = "crawl.dial_timeout"
ConfTimeout = "crawl.timeout"
ConfJobBufferSize = "crawl.job_buffer"
ConfCrawlStats = "output.crawl_stats"
ConfAllocStats = "output.resource_stats"
ConfVerbose = "output.verbose"
@@ -54,7 +59,7 @@ func prepareConfig() {
viper.SetDefault(ConfTasks, 3)
viper.SetDefault(ConfUserAgent, "")
viper.SetDefault(ConfDialTimeout, 10 * time.Second)
viper.SetDefault(ConfTimeout, 30 * time.Second)
viper.SetDefault(ConfTimeout, 60 * time.Second)
viper.SetDefault(ConfJobBufferSize, 5000)
viper.SetDefault(ConfCrawlStats, 3 * time.Second)
viper.SetDefault(ConfAllocStats, 0)
@@ -62,7 +67,10 @@ func prepareConfig() {
viper.SetDefault(ConfPrintHTTP, false)
viper.SetDefault(ConfLogFile, "")
viper.SetDefault(ConfRecheck, 3 * time.Second)
viper.SetDefault(ConfCooldown, 30 * time.Second)
viper.SetDefault(ConfChunkSize, "1 MB")
viper.SetDefault(ConfUploadRetries, 10)
viper.SetDefault(ConfUploadRetryInterval, 30 * time.Second)
}
func readConfig() {

View File

@@ -15,10 +15,17 @@ server:
# between /task/get requests to the server.
recheck: 1s
# Time to wait after receiving an error
# from the server. Doesn't apply to uploads.
cooldown: 30s
# Upload chunk size
# If the value is too high, the upload fails.
upload_chunk: 1 MB
upload_retries: 10
upload_retry_interval: 30s
# Log output settings
output:
# Crawl statistics

View File

@@ -17,7 +17,7 @@ var configFile string
var app = cli.App {
Name: "od-database-crawler",
Usage: "OD-Database Go crawler",
Version: "1.1.0",
Version: "1.1.1",
BashComplete: cli.DefaultAppComplete,
Writer: os.Stdout,
Action: cmdBase,
@@ -84,7 +84,7 @@ func cmdBase(_ *cli.Context) error {
if err != nil {
logrus.WithError(err).
Error("Failed to get new task")
time.Sleep(30 * time.Second)
time.Sleep(viper.GetDuration(ConfCooldown))
continue
}
if t == nil {
@@ -111,7 +111,7 @@ func cmdBase(_ *cli.Context) error {
} else if err != nil {
logrus.WithError(err).
Error("Failed to get new task")
time.Sleep(30 * time.Second)
time.Sleep(viper.GetDuration(ConfCooldown))
continue
}
ScheduleTask(inRemotes, t, &baseUri)

View File

@@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"github.com/sirupsen/logrus"
"github.com/spf13/viper"
"io"
"mime/multipart"
"net/http"
@@ -102,10 +103,10 @@ func uploadChunks(websiteId uint64, f *os.File) error {
multi.Close()
for retries := 0; retries < 10; retries++ {
for retries := 0; retries < viper.GetInt(ConfUploadRetries); retries++ {
if retries > 0 {
// Error occurred, retry upload
time.Sleep(5 * time.Second)
time.Sleep(viper.GetDuration(ConfUploadRetryInterval))
}
req, err := http.NewRequest(