mirror of
https://github.com/terorie/od-database-crawler.git
synced 2025-04-19 02:16:43 +00:00
Switch to spf13/cobra
lul
This commit is contained in:
parent
17ba5583c9
commit
e5746baa5b
2
go.mod
2
go.mod
@ -4,10 +4,10 @@ require (
|
|||||||
github.com/beeker1121/goque v2.0.1+incompatible
|
github.com/beeker1121/goque v2.0.1+incompatible
|
||||||
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db // indirect
|
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db // indirect
|
||||||
github.com/sirupsen/logrus v1.3.0
|
github.com/sirupsen/logrus v1.3.0
|
||||||
|
github.com/spf13/cobra v0.0.3
|
||||||
github.com/spf13/viper v1.3.1
|
github.com/spf13/viper v1.3.1
|
||||||
github.com/syndtr/goleveldb v0.0.0-20181128100959-b001fa50d6b2 // indirect
|
github.com/syndtr/goleveldb v0.0.0-20181128100959-b001fa50d6b2 // indirect
|
||||||
github.com/terorie/od-database-crawler v1.1.1
|
github.com/terorie/od-database-crawler v1.1.1
|
||||||
github.com/urfave/cli v1.20.0
|
|
||||||
github.com/valyala/fasthttp v1.1.0
|
github.com/valyala/fasthttp v1.1.0
|
||||||
golang.org/x/crypto v0.0.0-20190131182504-b8fe1690c613
|
golang.org/x/crypto v0.0.0-20190131182504-b8fe1690c613
|
||||||
golang.org/x/net v0.0.0-20180911220305-26e67e76b6c3
|
golang.org/x/net v0.0.0-20180911220305-26e67e76b6c3
|
||||||
|
4
go.sum
4
go.sum
@ -29,6 +29,8 @@ github.com/spf13/afero v1.1.2 h1:m8/z1t7/fwjysjQRYbP0RD+bUIF/8tJwPdEZsI83ACI=
|
|||||||
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
|
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
|
||||||
github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8=
|
github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8=
|
||||||
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
|
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
|
||||||
|
github.com/spf13/cobra v0.0.3 h1:ZlrZ4XsMRm04Fr5pSFxBgfND2EBVa1nLpiy1stUsX/8=
|
||||||
|
github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ=
|
||||||
github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk=
|
github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk=
|
||||||
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
|
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
|
||||||
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
|
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
|
||||||
@ -42,8 +44,6 @@ github.com/syndtr/goleveldb v0.0.0-20181128100959-b001fa50d6b2/go.mod h1:Z4AUp2K
|
|||||||
github.com/terorie/od-database-crawler v1.1.1 h1:Ca+ZqbZX3rVWBR8SDRzvroyxjBtUs75MQXZ9YG0gqGo=
|
github.com/terorie/od-database-crawler v1.1.1 h1:Ca+ZqbZX3rVWBR8SDRzvroyxjBtUs75MQXZ9YG0gqGo=
|
||||||
github.com/terorie/od-database-crawler v1.1.1/go.mod h1:vVJ7pLkudrlUNp9qu24JCzQ8N6mFsrOmX1tPXr155DQ=
|
github.com/terorie/od-database-crawler v1.1.1/go.mod h1:vVJ7pLkudrlUNp9qu24JCzQ8N6mFsrOmX1tPXr155DQ=
|
||||||
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
|
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
|
||||||
github.com/urfave/cli v1.20.0 h1:fDqGv3UG/4jbVl/QkFwEdddtEDjh/5Ov6X+0B/3bPaw=
|
|
||||||
github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
|
|
||||||
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
|
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
|
||||||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
||||||
github.com/valyala/fasthttp v1.1.0 h1:3BohG7mqwj4lq7PTX//7gLbUlzNvZSPmuHFnloXT0lw=
|
github.com/valyala/fasthttp v1.1.0 h1:3BohG7mqwj4lq7PTX//7gLbUlzNvZSPmuHFnloXT0lw=
|
||||||
|
16
help.go
Normal file
16
help.go
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
const helpText =
|
||||||
|
`HTTP crawler for the OD-Database
|
||||||
|
DB >> https://od-db.the-eye.eu <<
|
||||||
|
Crawler >> https://github.com/terorie/od-database-crawler <<
|
||||||
|
Config >> https://bit.ly/2MOAsUp
|
||||||
|
Server >> https://github.com/simon987/od-database <<
|
||||||
|
|
||||||
|
Quick start:
|
||||||
|
- get config file (config.yml in working dir)
|
||||||
|
- get OD-DB server ("server.url": Database URL + /api)
|
||||||
|
- get access token ("server.token": e.g. c010b6dd-20...)
|
||||||
|
- ./od-database-crawler server
|
||||||
|
|
||||||
|
Questions? Discord @terorie#2664 / Telegram @terorie`
|
92
main.go
92
main.go
@ -2,10 +2,11 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
|
"github.com/spf13/cobra"
|
||||||
"github.com/spf13/viper"
|
"github.com/spf13/viper"
|
||||||
"github.com/terorie/od-database-crawler/fasturl"
|
"github.com/terorie/od-database-crawler/fasturl"
|
||||||
"github.com/urfave/cli"
|
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
@ -14,47 +15,48 @@ import (
|
|||||||
|
|
||||||
var configFile string
|
var configFile string
|
||||||
|
|
||||||
var app = cli.App {
|
var rootCmd = cobra.Command {
|
||||||
Name: "od-database-crawler",
|
Use: "od-database-crawler",
|
||||||
Usage: "OD-Database Go crawler",
|
|
||||||
Version: "1.1.1",
|
Version: "1.1.1",
|
||||||
BashComplete: cli.DefaultAppComplete,
|
Short: "OD-Database Go crawler",
|
||||||
Writer: os.Stdout,
|
Long: helpText,
|
||||||
Action: cmdBase,
|
PersistentPreRunE: preRun,
|
||||||
Commands: []cli.Command {
|
PersistentPostRun: func(cmd *cobra.Command, args []string) {
|
||||||
{
|
|
||||||
Name: "crawl",
|
|
||||||
Usage: "Crawl a list of URLs",
|
|
||||||
ArgsUsage: "<site>",
|
|
||||||
Action: cmdCrawler,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Flags: []cli.Flag {
|
|
||||||
cli.StringFlag {
|
|
||||||
Name: "config",
|
|
||||||
EnvVar: "CONFIG",
|
|
||||||
Destination: &configFile,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Before: func(i *cli.Context) error {
|
|
||||||
if configFile != "" {
|
|
||||||
viper.SetConfigFile(configFile)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
},
|
|
||||||
After: func(i *cli.Context) error {
|
|
||||||
exitHooks.Execute()
|
exitHooks.Execute()
|
||||||
return nil
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var serverCmd = cobra.Command {
|
||||||
|
Use: "server",
|
||||||
|
Short: "Start crawl server",
|
||||||
|
Long: "Connect to the OD-Database and contribute to the database\n" +
|
||||||
|
"by crawling the web for open directories!",
|
||||||
|
Run: cmdBase,
|
||||||
|
}
|
||||||
|
|
||||||
|
var crawlCmd = cobra.Command {
|
||||||
|
Use: "crawl",
|
||||||
|
Short: "Crawl an URL",
|
||||||
|
Long: "Crawl the URL specified.\n" +
|
||||||
|
"Results will not be uploaded to the database,\n" +
|
||||||
|
"they're saved under crawled/0.json instead.\n" +
|
||||||
|
"Primarily used for testing and benchmarking.",
|
||||||
|
RunE: cmdCrawler,
|
||||||
|
Args: cobra.ExactArgs(1),
|
||||||
|
}
|
||||||
|
|
||||||
var exitHooks Hooks
|
var exitHooks Hooks
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
|
rootCmd.AddCommand(&crawlCmd)
|
||||||
|
rootCmd.AddCommand(&serverCmd)
|
||||||
|
|
||||||
|
pf := rootCmd.PersistentFlags()
|
||||||
|
pf.StringVar(&configFile, "config", "", "Config file")
|
||||||
prepareConfig()
|
prepareConfig()
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func preRun(cmd *cobra.Command, args []string) error {
|
||||||
if err := os.MkdirAll("crawled", 0755);
|
if err := os.MkdirAll("crawled", 0755);
|
||||||
err != nil { panic(err) }
|
err != nil { panic(err) }
|
||||||
|
|
||||||
@ -62,10 +64,22 @@ func main() {
|
|||||||
err != nil { panic(err) }
|
err != nil { panic(err) }
|
||||||
|
|
||||||
readConfig()
|
readConfig()
|
||||||
app.Run(os.Args)
|
|
||||||
|
if configFile != "" {
|
||||||
|
viper.SetConfigFile(configFile)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func cmdBase(_ *cli.Context) error {
|
func main() {
|
||||||
|
err := rootCmd.Execute()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintln(os.Stderr, err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func cmdBase(_ *cobra.Command, _ []string) {
|
||||||
// TODO Graceful shutdown
|
// TODO Graceful shutdown
|
||||||
appCtx := context.Background()
|
appCtx := context.Background()
|
||||||
forceCtx := context.Background()
|
forceCtx := context.Background()
|
||||||
@ -78,7 +92,7 @@ func cmdBase(_ *cli.Context) error {
|
|||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-appCtx.Done():
|
case <-appCtx.Done():
|
||||||
return nil
|
return
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
t, err := FetchTask()
|
t, err := FetchTask()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -117,16 +131,10 @@ func cmdBase(_ *cli.Context) error {
|
|||||||
ScheduleTask(inRemotes, t, &baseUri)
|
ScheduleTask(inRemotes, t, &baseUri)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func cmdCrawler(clic *cli.Context) error {
|
func cmdCrawler(_ *cobra.Command, args []string) error {
|
||||||
if clic.NArg() != 1 {
|
arg := args[0]
|
||||||
cli.ShowCommandHelpAndExit(clic, "crawl", 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
arg := clic.Args()[0]
|
|
||||||
// https://github.com/golang/go/issues/19779
|
// https://github.com/golang/go/issues/19779
|
||||||
if !strings.Contains(arg, "://") {
|
if !strings.Contains(arg, "://") {
|
||||||
arg = "http://" + arg
|
arg = "http://" + arg
|
||||||
|
Loading…
x
Reference in New Issue
Block a user