From f3620262fced2afde0fc1961891cb39f4643ffec Mon Sep 17 00:00:00 2001
From: Richard Patel <terorie@alphakevin.club>
Date: Sun, 18 Nov 2018 14:46:52 +0100
Subject: [PATCH] Add log file support

---
 config.go  | 15 +++++++++++++++
 config.yml |  8 ++++++++
 main.go    | 18 +++++++-----------
 util.go    | 22 +++++++++++++++++++++-
 4 files changed, 51 insertions(+), 12 deletions(-)

diff --git a/config.go b/config.go
index e949a83..f58d9a3 100644
--- a/config.go
+++ b/config.go
@@ -1,9 +1,11 @@
 package main
 
 import (
+	"bufio"
 	"fmt"
 	"github.com/sirupsen/logrus"
 	"github.com/spf13/viper"
+	"io"
 	"os"
 	"strings"
 	"time"
@@ -41,6 +43,7 @@ const (
 	ConfAllocStats = "output.resource_stats"
 	ConfVerbose    = "output.verbose"
 	ConfPrintHTTP  = "output.http"
+	ConfLogFile    = "output.log"
 )
 
 func prepareConfig() {
@@ -53,6 +56,7 @@ func prepareConfig() {
 	viper.SetDefault(ConfAllocStats, 0)
 	viper.SetDefault(ConfVerbose, false)
 	viper.SetDefault(ConfPrintHTTP, false)
+	viper.SetDefault(ConfLogFile, "")
 	viper.SetDefault(ConfRecheck, 3 * time.Second)
 	viper.SetDefault(ConfChunkSize, "1 MB")
 }
@@ -114,6 +118,17 @@ func readConfig() {
 		logrus.SetLevel(logrus.DebugLevel)
 	}
 
+	if filePath := viper.GetString(ConfLogFile); filePath != "" {
+		f, err := os.OpenFile(filePath, os.O_CREATE | os.O_WRONLY | os.O_APPEND, 0644)
+		bufWriter := bufio.NewWriter(f)
+		if err != nil { panic(err) }
+		exitHooks.Add(func() {
+			bufWriter.Flush()
+			f.Close()
+		})
+		logrus.SetOutput(io.MultiWriter(os.Stdout, bufWriter))
+	}
+
 	config.PrintHTTP = viper.GetBool(ConfPrintHTTP)
 }
 
diff --git a/config.yml b/config.yml
index f59555e..bb9aafe 100644
--- a/config.yml
+++ b/config.yml
@@ -23,13 +23,20 @@ server:
 output:
   # Crawl statistics
   crawl_stats: 1s
+
   # CPU/RAM/Job queue stats
   resource_stats: 10s
+
   # More output? (Every listed dir)
   verbose: false
+
   # Print HTTP errors (Super spammy)
   http: false
 
+  # Log file
+  # If empty, no log file is created.
+  log: crawler.log
+
 # Crawler settings
 crawl:
   # Number of sites that can be processed at once
@@ -49,4 +56,5 @@ crawl:
   timeout: 10s
 
   # Crawler User-Agent
+  # If empty, no User-Agent header is sent.
   user-agent: "Mozilla/5.0 (X11; od-database-crawler) Gecko/20100101 Firefox/52.0"
diff --git a/main.go b/main.go
index 5a1194a..32e1998 100644
--- a/main.go
+++ b/main.go
@@ -5,9 +5,6 @@ import (
 	"github.com/sirupsen/logrus"
 	"github.com/terorie/od-database-crawler/fasturl"
 	"github.com/urfave/cli"
-	"log"
-	"net/http"
-	_ "net/http/pprof"
 	"os"
 	"strings"
 	"sync/atomic"
@@ -29,28 +26,29 @@ var app = cli.App {
 			Action:    cmdCrawler,
 		},
 	},
+	After: func(i *cli.Context) error {
+		exitHooks.Execute()
+		return nil
+	},
 }
 
+var exitHooks Hooks
+
 func init() {
 	prepareConfig()
 }
 
 func main() {
-	go func() {
-		log.Println(http.ListenAndServe("localhost:42069", nil))
-	}()
-
 	err := os.MkdirAll("crawled", 0755)
 	if err != nil {
 		panic(err)
 	}
 
+	readConfig()
 	app.Run(os.Args)
 }
 
 func cmdBase(_ *cli.Context) error {
-	readConfig()
-
 	// TODO Graceful shutdown
 	appCtx := context.Background()
 	forceCtx := context.Background()
@@ -107,8 +105,6 @@ func cmdBase(_ *cli.Context) error {
 }
 
 func cmdCrawler(clic *cli.Context) error {
-	readConfig()
-
 	if clic.NArg() != 1 {
 		cli.ShowCommandHelpAndExit(clic, "crawl", 1)
 	}
diff --git a/util.go b/util.go
index 6db4c4f..a23c906 100644
--- a/util.go
+++ b/util.go
@@ -1,6 +1,9 @@
 package main
 
-import "fmt"
+import (
+	"fmt"
+	"sync"
+)
 
 // https://programming.guide/go/formatting-byte-size-to-human-readable-format.html
 func FormatByteCount(b uint64) string {
@@ -16,3 +19,20 @@ func FormatByteCount(b uint64) string {
 		return fmt.Sprintf("%.1f %ciB", float64(b)/float64(div), "KMGTPE"[exp])
 	}
 }
+
+type Hooks struct {
+	m sync.Mutex
+	l []func()
+}
+
+func (h *Hooks) Add(hook func()) {
+	h.m.Lock()
+	h.l = append(h.l, hook)
+	h.m.Unlock()
+}
+
+func (h *Hooks) Execute() {
+	for _, hook := range h.l {
+		hook()
+	}
+}