# OD-Database server settings server: # Connection URL url: http://od-db.mine.terorie.com/api # Server auth token token: # Request timeout timeout: 60s # Recheck interval # The crawler periodically asks the server # for new jobs. Sets the minimum wait time # between /task/get requests to the server. recheck: 1s # Time to wait after receiving an error # from the server. Doesn't apply to uploads. cooldown: 30s # Upload chunk size # If the value is too high, the upload fails. upload_chunk: 1 MB upload_retries: 10 upload_retry_interval: 30s # Log output settings output: # Crawl statistics crawl_stats: 1s # CPU/RAM/Job queue stats resource_stats: 10s # More output? (Every listed dir) verbose: false # Print HTTP errors (Super spammy) http: false # Log file # If empty, no log file is created. log: crawler.log # Crawler settings crawl: # Number of sites that can be processed at once tasks: 25 # Number of connections per site # Please be careful with this setting! # The crawler fires fast and more than # ten connections can overwhelm a server. connections: 1 # How often to retry getting data # from the site before giving up retries: 5 # Time before discarding a failed connection attempt dial_timeout: 10s # Time before discarding a network request timeout: 30s # Crawler User-Agent # If empty, no User-Agent header is sent. user-agent: "Mozilla/5.0 (X11; od-database-crawler) Gecko/20100101 Firefox/52.0" # Job buffer size (per task) # Higher values cause less disk writes # but require more memory. # # The job queue contains all URLs # that should be crawled next. # As it grows very large over time, # it's kept mainly on disk. # This sets how many jobs are kept # in memory. # A negative value will cause all jobs # to be stored in memory. (Don't do this) job_buffer: -1