mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-12-14 07:39:03 +00:00
Switch to spf13/cobra
lul
This commit is contained in:
94
main.go
94
main.go
@@ -2,10 +2,11 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/spf13/viper"
|
||||
"github.com/terorie/od-database-crawler/fasturl"
|
||||
"github.com/urfave/cli"
|
||||
"os"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
@@ -14,47 +15,48 @@ import (
|
||||
|
||||
var configFile string
|
||||
|
||||
var app = cli.App {
|
||||
Name: "od-database-crawler",
|
||||
Usage: "OD-Database Go crawler",
|
||||
Version: "1.1.1",
|
||||
BashComplete: cli.DefaultAppComplete,
|
||||
Writer: os.Stdout,
|
||||
Action: cmdBase,
|
||||
Commands: []cli.Command {
|
||||
{
|
||||
Name: "crawl",
|
||||
Usage: "Crawl a list of URLs",
|
||||
ArgsUsage: "<site>",
|
||||
Action: cmdCrawler,
|
||||
},
|
||||
},
|
||||
Flags: []cli.Flag {
|
||||
cli.StringFlag {
|
||||
Name: "config",
|
||||
EnvVar: "CONFIG",
|
||||
Destination: &configFile,
|
||||
},
|
||||
},
|
||||
Before: func(i *cli.Context) error {
|
||||
if configFile != "" {
|
||||
viper.SetConfigFile(configFile)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
After: func(i *cli.Context) error {
|
||||
var rootCmd = cobra.Command {
|
||||
Use: "od-database-crawler",
|
||||
Version: "1.1.1",
|
||||
Short: "OD-Database Go crawler",
|
||||
Long: helpText,
|
||||
PersistentPreRunE: preRun,
|
||||
PersistentPostRun: func(cmd *cobra.Command, args []string) {
|
||||
exitHooks.Execute()
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
var serverCmd = cobra.Command {
|
||||
Use: "server",
|
||||
Short: "Start crawl server",
|
||||
Long: "Connect to the OD-Database and contribute to the database\n" +
|
||||
"by crawling the web for open directories!",
|
||||
Run: cmdBase,
|
||||
}
|
||||
|
||||
var crawlCmd = cobra.Command {
|
||||
Use: "crawl",
|
||||
Short: "Crawl an URL",
|
||||
Long: "Crawl the URL specified.\n" +
|
||||
"Results will not be uploaded to the database,\n" +
|
||||
"they're saved under crawled/0.json instead.\n" +
|
||||
"Primarily used for testing and benchmarking.",
|
||||
RunE: cmdCrawler,
|
||||
Args: cobra.ExactArgs(1),
|
||||
}
|
||||
|
||||
var exitHooks Hooks
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(&crawlCmd)
|
||||
rootCmd.AddCommand(&serverCmd)
|
||||
|
||||
pf := rootCmd.PersistentFlags()
|
||||
pf.StringVar(&configFile, "config", "", "Config file")
|
||||
prepareConfig()
|
||||
}
|
||||
|
||||
func main() {
|
||||
func preRun(cmd *cobra.Command, args []string) error {
|
||||
if err := os.MkdirAll("crawled", 0755);
|
||||
err != nil { panic(err) }
|
||||
|
||||
@@ -62,10 +64,22 @@ func main() {
|
||||
err != nil { panic(err) }
|
||||
|
||||
readConfig()
|
||||
app.Run(os.Args)
|
||||
|
||||
if configFile != "" {
|
||||
viper.SetConfigFile(configFile)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func cmdBase(_ *cli.Context) error {
|
||||
func main() {
|
||||
err := rootCmd.Execute()
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func cmdBase(_ *cobra.Command, _ []string) {
|
||||
// TODO Graceful shutdown
|
||||
appCtx := context.Background()
|
||||
forceCtx := context.Background()
|
||||
@@ -78,7 +92,7 @@ func cmdBase(_ *cli.Context) error {
|
||||
for {
|
||||
select {
|
||||
case <-appCtx.Done():
|
||||
return nil
|
||||
return
|
||||
case <-ticker.C:
|
||||
t, err := FetchTask()
|
||||
if err != nil {
|
||||
@@ -117,16 +131,10 @@ func cmdBase(_ *cli.Context) error {
|
||||
ScheduleTask(inRemotes, t, &baseUri)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func cmdCrawler(clic *cli.Context) error {
|
||||
if clic.NArg() != 1 {
|
||||
cli.ShowCommandHelpAndExit(clic, "crawl", 1)
|
||||
}
|
||||
|
||||
arg := clic.Args()[0]
|
||||
func cmdCrawler(_ *cobra.Command, args []string) error {
|
||||
arg := args[0]
|
||||
// https://github.com/golang/go/issues/19779
|
||||
if !strings.Contains(arg, "://") {
|
||||
arg = "http://" + arg
|
||||
|
||||
Reference in New Issue
Block a user