package main import ( "context" "github.com/sirupsen/logrus" "github.com/terorie/oddb-go/fasturl" "github.com/terorie/oddb-go/runes" "github.com/urfave/cli" "log" "net/http" _ "net/http/pprof" "os" "strings" "time" ) var app = cli.App { Name: "oddb-go", Usage: "OD-Database Go crawler", Version: "0.1", BashComplete: cli.DefaultAppComplete, Writer: os.Stdout, Compiled: buildDate, Commands: []cli.Command{ { Name: "crawl", Usage: "Crawl a list of URLs", ArgsUsage: "[site, site, ...]", Action: cmdCrawler, }, }, } func init() { prepareConfig() } func main() { go func() { log.Println(http.ListenAndServe("localhost:42069", nil)) }() app.Run(os.Args) } func cmdCrawler(clic *cli.Context) error { readConfig() if clic.NArg() == 0 { cli.ShowCommandHelpAndExit(clic, "crawl", 1) } args := clic.Args() remotes := make([]*OD, len(args)) for i, argStr := range args { // https://github.com/golang/go/issues/19779 if !strings.Contains(argStr, "://") { argStr = "http://" + argStr } arg := []rune(argStr) var u fasturl.URL err := u.Parse(arg) if !runes.HasSuffix(u.Path, []rune("/")) { u.Path = append(u.Path, '/') } if err != nil { return err } remotes[i] = &OD{ BaseUri: u } } c := context.Background() inRemotes := make(chan *OD) go Schedule(c, inRemotes) for _, remote := range remotes { globalWait.Add(1) inRemotes <- remote } // Wait for all jobs to finish globalWait.Wait() logrus.Info("All dirs processed!") return nil } var buildDate = time.Date( 2018, 10, 28, 17, 10, 0, 0, time.UTC)