From bfd7302be8011649b00422e78b6d14f61b5f2c3a Mon Sep 17 00:00:00 2001 From: Richard Patel Date: Sun, 28 Oct 2018 17:59:46 +0100 Subject: [PATCH] Add urfave/cli app --- config.go | 23 ++++++++++++-------- crawl.go | 4 ---- main.go | 64 +++++++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 71 insertions(+), 20 deletions(-) diff --git a/config.go b/config.go index bb36103..3e2f293 100644 --- a/config.go +++ b/config.go @@ -1,8 +1,10 @@ package main import ( + "fmt" "github.com/sirupsen/logrus" "github.com/spf13/viper" + "os" "time" ) @@ -39,18 +41,19 @@ func readConfig() { viper.SetConfigName("config") err := viper.ReadInConfig() if err != nil { - logrus.Fatal(err) + fmt.Fprintln(os.Stderr, err) + os.Exit(1) } config.ServerUrl = viper.GetString(ConfServerUrl) - if config.ServerUrl == "" { - configMissing(ConfServerUrl) - } + //if config.ServerUrl == "" { + // configMissing(ConfServerUrl) + //} config.Token = viper.GetString(ConfToken) - if config.Token == "" { - configMissing(ConfToken) - } + //if config.Token == "" { + // configMissing(ConfToken) + //} config.Retries = viper.GetInt(ConfRetries) if config.Retries < 0 { @@ -76,9 +79,11 @@ func readConfig() { } func configMissing(key string) { - logrus.Fatalf("config: %s not set!", key) + fmt.Fprintf(os.Stderr, "config: %s not set!\n", key) + os.Exit(1) } func configOOB(key string, v int) { - logrus.Fatal("config: illegal value %d for %key!", v, key) + fmt.Fprintf(os.Stderr, "config: illegal value %d for %key!\n", v, key) + os.Exit(1) } diff --git a/crawl.go b/crawl.go index c8cdc10..7532fef 100644 --- a/crawl.go +++ b/crawl.go @@ -19,10 +19,6 @@ var client fasthttp.Client var ErrRateLimit = errors.New("too many requests") var ErrForbidden = errors.New("access denied") -func NewRemoteDir(u url.URL) *OD { - return &OD{ BaseUri: u } -} - func GetDir(j *Job, f *File) (links []url.URL, err error) { f.IsDir = true f.Name = path.Base(j.Uri.Path) diff --git a/main.go b/main.go index db979b6..86ca871 100644 --- a/main.go +++ b/main.go @@ -2,27 +2,77 @@ package main import ( "context" + "github.com/urfave/cli" "net/url" + "os" + "strings" + "time" ) +var app = cli.App { + Name: "oddb-go", + Usage: "OD-Database Go crawler", + Version: "0.1", + BashComplete: cli.DefaultAppComplete, + Writer: os.Stdout, + Compiled: buildDate, + Commands: []cli.Command{ + { + Name: "crawl", + Usage: "Crawl a list of URLs", + ArgsUsage: "[site, site, ...]", + Action: cmdCrawler, + }, + }, +} + func init() { prepareConfig() } func main() { + app.Run(os.Args) +} + +func cmdCrawler(clic *cli.Context) error { readConfig() + if clic.NArg() == 0 { + cli.ShowCommandHelpAndExit(clic, "crawl", 1) + } + + args := clic.Args() + remotes := make([]*OD, len(args)) + for i, arg := range args { + // https://github.com/golang/go/issues/19779 + if !strings.Contains(arg, "://") { + arg = "http://" + arg + } + u, err := url.Parse(arg) + if !strings.HasSuffix(u.Path, "/") { + u.Path += "/" + } + if err != nil { return err } + remotes[i] = &OD{ BaseUri: *u } + } + c := context.Background() - remotes := make(chan *OD) - go Schedule(c, remotes) + inRemotes := make(chan *OD) + go Schedule(c, inRemotes) - u, _ := url.Parse("http://mine.terorie.com:420/") - remote := NewRemoteDir(*u) - - globalWait.Add(1) - remotes <- remote + for _, remote := range remotes { + globalWait.Add(1) + inRemotes <- remote + } // Wait for all jobs to finish globalWait.Wait() + + return nil } + +var buildDate = time.Date( + 2018, 10, 28, + 17, 10, 0, 0, + time.UTC)