mirror of
				https://github.com/simon987/imhashdb.git
				synced 2025-10-25 14:06:55 +00:00 
			
		
		
		
	cleanup
This commit is contained in:
		
							parent
							
								
									91f0c9b1f9
								
							
						
					
					
						commit
						5cd367456c
					
				
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1,3 +1,4 @@ | ||||
| *.prof | ||||
| .idea/ | ||||
| *.iml | ||||
| *.iml | ||||
| cli/cli | ||||
							
								
								
									
										17
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								Dockerfile
									
									
									
									
									
								
							| @ -1,17 +0,0 @@ | ||||
| FROM ubuntu:16.04 | ||||
| 
 | ||||
| # WIP... | ||||
| RUN apt update | ||||
| RUN apt install git libopencv-dev wget libssl-dev -y | ||||
| 
 | ||||
| RUN wget https://github.com/Kitware/CMake/releases/download/v3.16.2/cmake-3.16.2.tar.gz && \ | ||||
|     tar -xzf cmake-*.tar.gz && cd cmake-* && ./bootstrap && make -j 4 && make install | ||||
| 
 | ||||
| WORKDIR /build/ | ||||
| 
 | ||||
| RUN git clone --recursive https://github.com/simon987/fastimagehash | ||||
| 
 | ||||
| WORKDIR /build/fastimagehash | ||||
| 
 | ||||
| RUN cmake . | ||||
| RUN make && make install | ||||
							
								
								
									
										194
									
								
								cli/main.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										194
									
								
								cli/main.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,194 @@ | ||||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	. "github.com/simon987/imhashdb" | ||||
| 	"github.com/simon987/imhashdb/hasher" | ||||
| 	api "github.com/simon987/imhashdb/web" | ||||
| 	"github.com/urfave/cli/v2" | ||||
| 	"os" | ||||
| ) | ||||
| 
 | ||||
| func main() { | ||||
| 	app := &cli.App{ | ||||
| 		Commands: []*cli.Command{ | ||||
| 			{ | ||||
| 				Name:  "web", | ||||
| 				Usage: "Start http API", | ||||
| 				Action: func(c *cli.Context) error { | ||||
| 					Init() | ||||
| 					return api.Main() | ||||
| 				}, | ||||
| 				Flags: []cli.Flag{ | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "pg-user", | ||||
| 						Value:       "imhashdb", | ||||
| 						Usage:       "PostgreSQL user", | ||||
| 						EnvVars:     []string{"IMHASHDB_PG_USER"}, | ||||
| 						Destination: &Conf.PgUser, | ||||
| 					}, | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "pg-password", | ||||
| 						Value:       "imhashdb", | ||||
| 						Usage:       "PostgreSQL password", | ||||
| 						EnvVars:     []string{"IMHASHDB_PG_PASSWORD"}, | ||||
| 						Destination: &Conf.PgPassword, | ||||
| 					}, | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "pg-db", | ||||
| 						Value:       "imhashdb", | ||||
| 						Usage:       "PostgreSQL database", | ||||
| 						EnvVars:     []string{"IMHASHDB_PG_DATABASE"}, | ||||
| 						Destination: &Conf.PgDb, | ||||
| 					}, | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "pg-host", | ||||
| 						Value:       "localhost", | ||||
| 						Usage:       "PostgreSQL host", | ||||
| 						EnvVars:     []string{"IMHASHDB_PG_HOST"}, | ||||
| 						Destination: &Conf.PgHost, | ||||
| 					}, | ||||
| 					&cli.IntFlag{ | ||||
| 						Name:        "pg-port", | ||||
| 						Value:       5432, | ||||
| 						Usage:       "PostgreSQL port", | ||||
| 						EnvVars:     []string{"IMHASHDB_PG_PORT"}, | ||||
| 						Destination: &Conf.PgPort, | ||||
| 					}, | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "redis-addr", | ||||
| 						Value:       "localhost:6379", | ||||
| 						Usage:       "redis address", | ||||
| 						EnvVars:     []string{"IMHASHDB_REDIS_ADDR"}, | ||||
| 						Destination: &Conf.RedisAddr, | ||||
| 					}, | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "redis-password", | ||||
| 						Value:       "", | ||||
| 						Usage:       "redis password", | ||||
| 						EnvVars:     []string{"IMHASHDB_REDIS_PASSWORD"}, | ||||
| 						Destination: &Conf.RedisPassword, | ||||
| 					}, | ||||
| 					&cli.IntFlag{ | ||||
| 						Name:        "redis-db", | ||||
| 						Value:       0, | ||||
| 						Usage:       "redis db", | ||||
| 						EnvVars:     []string{"IMHASHDB_REDIS_DB"}, | ||||
| 						Destination: &Conf.RedisDb, | ||||
| 					}, | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "api-addr", | ||||
| 						Value:       "localhost:8080", | ||||
| 						Usage:       "HTTP api address", | ||||
| 						EnvVars:     []string{"IMHASHDB_API_ADDR"}, | ||||
| 						Destination: &Conf.ApiAddr, | ||||
| 					}, | ||||
| 					&cli.IntFlag{ | ||||
| 						Name:        "query-concurrency", | ||||
| 						Value:       2, | ||||
| 						Usage:       "Number of background query workers", | ||||
| 						EnvVars:     []string{"IMHASHDB_QUERY_CONCURRENCY"}, | ||||
| 						Destination: &Conf.QueryConcurrency, | ||||
| 					}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			{ | ||||
| 				Name:  "hasher", | ||||
| 				Usage: "Start an hasher instance", | ||||
| 				Action: func(c *cli.Context) error { | ||||
| 					Init() | ||||
| 					return hasher.Main() | ||||
| 				}, | ||||
| 				Flags: []cli.Flag{ | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "pg-user", | ||||
| 						Value:       "imhashdb", | ||||
| 						Usage:       "PostgreSQL user", | ||||
| 						EnvVars:     []string{"IMHASHDB_PG_USER"}, | ||||
| 						Destination: &Conf.PgUser, | ||||
| 					}, | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "pg-password", | ||||
| 						Value:       "imhashdb", | ||||
| 						Usage:       "PostgreSQL password", | ||||
| 						EnvVars:     []string{"IMHASHDB_PG_PASSWORD"}, | ||||
| 						Destination: &Conf.PgPassword, | ||||
| 					}, | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "pg-db", | ||||
| 						Value:       "imhashdb", | ||||
| 						Usage:       "PostgreSQL database", | ||||
| 						EnvVars:     []string{"IMHASHDB_PG_DATABASE"}, | ||||
| 						Destination: &Conf.PgDb, | ||||
| 					}, | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "pg-host", | ||||
| 						Value:       "localhost", | ||||
| 						Usage:       "PostgreSQL host", | ||||
| 						EnvVars:     []string{"IMHASHDB_PG_HOST"}, | ||||
| 						Destination: &Conf.PgHost, | ||||
| 					}, | ||||
| 					&cli.IntFlag{ | ||||
| 						Name:        "pg-port", | ||||
| 						Value:       5432, | ||||
| 						Usage:       "PostgreSQL port", | ||||
| 						EnvVars:     []string{"IMHASHDB_PG_PORT"}, | ||||
| 						Destination: &Conf.PgPort, | ||||
| 					}, | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "redis-addr", | ||||
| 						Value:       "localhost:6379", | ||||
| 						Usage:       "redis address", | ||||
| 						EnvVars:     []string{"IMHASHDB_REDIS_ADDR"}, | ||||
| 						Destination: &Conf.RedisAddr, | ||||
| 					}, | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "redis-password", | ||||
| 						Value:       "", | ||||
| 						Usage:       "redis password", | ||||
| 						EnvVars:     []string{"IMHASHDB_REDIS_PASSWORD"}, | ||||
| 						Destination: &Conf.RedisPassword, | ||||
| 					}, | ||||
| 					&cli.IntFlag{ | ||||
| 						Name:        "redis-db", | ||||
| 						Value:       0, | ||||
| 						Usage:       "redis db", | ||||
| 						EnvVars:     []string{"IMHASHDB_REDIS_DB"}, | ||||
| 						Destination: &Conf.RedisDb, | ||||
| 					}, | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "imgur-clientid", | ||||
| 						Value:       "546c25a59c58ad7", | ||||
| 						Usage:       "imgur API client id", | ||||
| 						EnvVars:     []string{"IMHASHDB_IMGUR_CLIENTID"}, | ||||
| 						Destination: &Conf.ImgurClientId, | ||||
| 					}, | ||||
| 					&cli.StringFlag{ | ||||
| 						Name:        "hasher-pattern", | ||||
| 						Value:       "q.*", | ||||
| 						Usage:       "redis pattern for hasher input tasks", | ||||
| 						EnvVars:     []string{"IMHASHDB_HASHER_PATTERN"}, | ||||
| 						Destination: &Conf.HasherPattern, | ||||
| 					}, | ||||
| 					&cli.IntFlag{ | ||||
| 						Name:        "hash-concurrency", | ||||
| 						Value:       4, | ||||
| 						Usage:       "Thread count for hasher", | ||||
| 						EnvVars:     []string{"IMHASHDB_HASH_CONCURRENCY"}, | ||||
| 						Destination: &Conf.HasherConcurrency, | ||||
| 					}, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		Authors: []*cli.Author{ | ||||
| 			{ | ||||
| 				Name:  "simon987", | ||||
| 				Email: "me@simon987.net", | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	err := app.Run(os.Args) | ||||
| 	if err != nil { | ||||
| 		panic(err) | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										107
									
								
								core.go
									
									
									
									
									
								
							
							
						
						
									
										107
									
								
								core.go
									
									
									
									
									
								
							| @ -8,17 +8,11 @@ import ( | ||||
| 	"github.com/simon987/fastimagehash-go" | ||||
| 	"github.com/valyala/fasthttp" | ||||
| 	"go.uber.org/zap" | ||||
| 	"log" | ||||
| 	"net" | ||||
| 	"net/url" | ||||
| 	"os" | ||||
| 	"strings" | ||||
| 	"syscall" | ||||
| ) | ||||
| 
 | ||||
| const RedisPrefix = "q." | ||||
| const UserAgent = "imhashdb/v1.0" | ||||
| const Concurrency = 4 | ||||
| 
 | ||||
| var ImageSuffixes = []string{ | ||||
| 	".jpeg", ".jpg", ".png", | ||||
| @ -26,22 +20,53 @@ var ImageSuffixes = []string{ | ||||
| 	".bmp", ".webp", | ||||
| } | ||||
| 
 | ||||
| type Config struct { | ||||
| 	PgUser     string | ||||
| 	PgPassword string | ||||
| 	PgDb       string | ||||
| 	PgHost     string | ||||
| 	PgPort     int | ||||
| 
 | ||||
| 	RedisAddr     string | ||||
| 	RedisPassword string | ||||
| 	RedisDb       int | ||||
| 
 | ||||
| 	ApiAddr string | ||||
| 
 | ||||
| 	HasherConcurrency int | ||||
| 	QueryConcurrency  int | ||||
| 
 | ||||
| 	ImgurClientId string | ||||
| 	HasherPattern string | ||||
| } | ||||
| 
 | ||||
| var ImageBlackList = []string{} | ||||
| 
 | ||||
| var Rdb *redis.Client | ||||
| var Pgdb *pgx.ConnPool | ||||
| var Logger *zap.Logger | ||||
| var Conf Config | ||||
| 
 | ||||
| func Init() { | ||||
| 	Logger, _ = zap.NewDevelopment() | ||||
| 
 | ||||
| 	Rdb = redis.NewClient(&redis.Options{ | ||||
| 		Addr:     "localhost:6379", | ||||
| 		Password: "", | ||||
| 		DB:       0, | ||||
| 		Addr:     Conf.RedisAddr, | ||||
| 		Password: Conf.RedisPassword, | ||||
| 		DB:       Conf.RedisDb, | ||||
| 	}) | ||||
| 	_, err := Rdb.Ping().Result() | ||||
| 	if err != nil { | ||||
| 		Logger.Fatal("Could not connect to redis server") | ||||
| 	} | ||||
| 
 | ||||
| 	Pgdb = DbConnect("localhost", 5432, "imhashdb", "imhashdb", "imhashdb") | ||||
| 	Pgdb = DbConnect( | ||||
| 		Conf.PgHost, | ||||
| 		Conf.PgPort, | ||||
| 		Conf.PgUser, | ||||
| 		Conf.PgPassword, | ||||
| 		Conf.PgDb, | ||||
| 	) | ||||
| 	DbInit(Pgdb) | ||||
| } | ||||
| 
 | ||||
| @ -184,65 +209,3 @@ func Fetch(link string, headers ...[]string) ([]byte, error) { | ||||
| 
 | ||||
| 	return body, nil | ||||
| } | ||||
| 
 | ||||
| func IsPermanentError(err error) bool { | ||||
| 
 | ||||
| 	if strings.HasPrefix(err.Error(), "HTTP") { | ||||
| 		//TODO: Handle http 429 etc? | ||||
| 		return true | ||||
| 	} | ||||
| 
 | ||||
| 	var opErr *net.OpError | ||||
| 
 | ||||
| 	urlErr, ok := err.(*url.Error) | ||||
| 	if ok { | ||||
| 		opErr, ok = urlErr.Err.(*net.OpError) | ||||
| 		if !ok { | ||||
| 			if urlErr.Err != nil && urlErr.Err.Error() == "Proxy Authentication Required" { | ||||
| 				return true | ||||
| 			} | ||||
| 			return false | ||||
| 		} | ||||
| 
 | ||||
| 		if opErr.Err.Error() == "Internal Privoxy Error" { | ||||
| 			return true | ||||
| 		} | ||||
| 
 | ||||
| 	} else { | ||||
| 		_, ok := err.(net.Error) | ||||
| 		if ok { | ||||
| 			_, ok := err.(*net.DNSError) | ||||
| 			return ok | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if opErr == nil { | ||||
| 		return false | ||||
| 	} | ||||
| 
 | ||||
| 	if opErr.Timeout() { | ||||
| 		// Usually means thalt there is no route to host | ||||
| 		return true | ||||
| 	} | ||||
| 
 | ||||
| 	switch t := opErr.Err.(type) { | ||||
| 	case *net.DNSError: | ||||
| 		return true | ||||
| 	case *os.SyscallError: | ||||
| 		if errno, ok := t.Err.(syscall.Errno); ok { | ||||
| 			switch errno { | ||||
| 			case syscall.ECONNREFUSED: | ||||
| 				log.Println("connect refused") | ||||
| 				return true | ||||
| 			case syscall.ETIMEDOUT: | ||||
| 				log.Println("timeout") | ||||
| 				return false | ||||
| 			case syscall.ECONNRESET: | ||||
| 				log.Println("connection reset by peer") | ||||
| 				return false | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return false | ||||
| } | ||||
|  | ||||
							
								
								
									
										3
									
								
								go.mod
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								go.mod
									
									
									
									
									
								
							| @ -22,8 +22,9 @@ require ( | ||||
| 	github.com/onsi/gomega v1.9.0 // indirect | ||||
| 	github.com/pkg/errors v0.9.1 | ||||
| 	github.com/shopspring/decimal v0.0.0-20200227202807-02e2044944cc // indirect | ||||
| 	github.com/simon987/fastimagehash-go v0.0.0-20200412154506-b0e9d9b3a73e | ||||
| 	github.com/simon987/fastimagehash-go v0.0.0-20200412174912-bee8c91bb52e | ||||
| 	github.com/stretchr/testify v1.5.1 // indirect | ||||
| 	github.com/urfave/cli/v2 v2.2.0 | ||||
| 	github.com/valyala/fasthttp v1.9.0 | ||||
| 	go.uber.org/zap v1.14.1 | ||||
| 	golang.org/x/crypto v0.0.0-20200406173513-056763e48d71 // indirect | ||||
|  | ||||
							
								
								
									
										11
									
								
								go.sum
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								go.sum
									
									
									
									
									
								
							| @ -2,6 +2,8 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ | ||||
| github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= | ||||
| github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I= | ||||
| github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ= | ||||
| github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY= | ||||
| github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= | ||||
| github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= | ||||
| github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | ||||
| github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= | ||||
| @ -80,8 +82,12 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE | ||||
| github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= | ||||
| github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | ||||
| github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= | ||||
| github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= | ||||
| github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= | ||||
| github.com/shopspring/decimal v0.0.0-20200227202807-02e2044944cc h1:jUIKcSPO9MoMJBbEoyE/RJoE8vz7Mb8AjvifMMwSyvY= | ||||
| github.com/shopspring/decimal v0.0.0-20200227202807-02e2044944cc/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= | ||||
| github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo= | ||||
| github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= | ||||
| github.com/simon987/fastimagehash-go v0.0.0-20200411005122-1886a7c50720 h1:0VrGo7jKQqv5cmuD/7Yd2O+o98/eyLi2wl4wWNsKfh0= | ||||
| github.com/simon987/fastimagehash-go v0.0.0-20200411005122-1886a7c50720/go.mod h1:MbqNG+6OaprdElEIes1aYF7qmLlaTop4j5X6pgNiaaw= | ||||
| github.com/simon987/fastimagehash-go v0.0.0-20200411154912-569fe641b1a7 h1:4XD2rCg4hJRcCZErDLp8lfMsHw5Zinr5e2t2C18GdzU= | ||||
| @ -90,6 +96,8 @@ github.com/simon987/fastimagehash-go v0.0.0-20200412153922-bcfdf954b464 h1:5p3TX | ||||
| github.com/simon987/fastimagehash-go v0.0.0-20200412153922-bcfdf954b464/go.mod h1:fmgaZptm6M5Kn3Ctu/R5p2fncGYPpGi/raZCZUrkRsY= | ||||
| github.com/simon987/fastimagehash-go v0.0.0-20200412154506-b0e9d9b3a73e h1:8+cH+kriBBb9OqtKh/wNsr+PvV8e73yNjEly5wAjFQk= | ||||
| github.com/simon987/fastimagehash-go v0.0.0-20200412154506-b0e9d9b3a73e/go.mod h1:fmgaZptm6M5Kn3Ctu/R5p2fncGYPpGi/raZCZUrkRsY= | ||||
| github.com/simon987/fastimagehash-go v0.0.0-20200412174912-bee8c91bb52e h1:86MhzPgOTM6dmzNF4qAOGY4zaZ9BgQFHLwwilMZla8I= | ||||
| github.com/simon987/fastimagehash-go v0.0.0-20200412174912-bee8c91bb52e/go.mod h1:fmgaZptm6M5Kn3Ctu/R5p2fncGYPpGi/raZCZUrkRsY= | ||||
| github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= | ||||
| github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= | ||||
| github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= | ||||
| @ -99,6 +107,9 @@ github.com/ugorji/go v1.1.7 h1:/68gy2h+1mWMrwZFeD1kQialdSzAb432dtpeJ42ovdo= | ||||
| github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= | ||||
| github.com/ugorji/go/codec v1.1.7 h1:2SvQaVZ1ouYrrKKwoSk2pzd4A9evlKJb9oTL+OaLUSs= | ||||
| github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= | ||||
| github.com/urfave/cli v1.22.4 h1:u7tSpNPPswAFymm8IehJhy4uJMlUuU/GmqSkvJ1InXA= | ||||
| github.com/urfave/cli/v2 v2.2.0 h1:JTTnM6wKzdA0Jqodd966MVj4vWbbquZykeX1sKbe2C4= | ||||
| github.com/urfave/cli/v2 v2.2.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ= | ||||
| github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= | ||||
| github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= | ||||
| github.com/valyala/fasthttp v1.9.0 h1:hNpmUdy/+ZXYpGy0OBfm7K0UQTzb73W0T0U4iJIVrMw= | ||||
|  | ||||
| @ -1,4 +1,4 @@ | ||||
| package main | ||||
| package hasher | ||||
| 
 | ||||
| import ( | ||||
| 	"crypto/md5" | ||||
| @ -19,7 +19,7 @@ type Task struct { | ||||
| 	Id   int64    `json:"_id"` | ||||
| } | ||||
| 
 | ||||
| func dispatchFromQueue(pattern string, queue chan []string) { | ||||
| func dispatchFromQueue(pattern string, queue chan []string) error { | ||||
| 
 | ||||
| 	for { | ||||
| 		keys, err := Rdb.Keys(pattern).Result() | ||||
| @ -33,8 +33,11 @@ func dispatchFromQueue(pattern string, queue chan []string) { | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		//TODO: put in WIP list, resume on crash | ||||
| 		queue <- rawTask | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func worker(queue chan []string) { | ||||
| @ -66,11 +69,6 @@ func computeAndStore(rawTask []string) { | ||||
| 
 | ||||
| 			data, err := Fetch(turl) | ||||
| 			if err != nil { | ||||
| 				if !IsPermanentError(err) { | ||||
| 					// Retry later | ||||
| 					Logger.Debug("Will retry task later", zap.String("link", link)) | ||||
| 					Rdb.RPush(rawTask[0], rawTask[1]) | ||||
| 				} | ||||
| 				continue | ||||
| 			} | ||||
| 
 | ||||
| @ -107,19 +105,12 @@ func trimUrl(link string) string { | ||||
| 	return link | ||||
| } | ||||
| 
 | ||||
| func main() { | ||||
| 	Init() | ||||
| func Main() error { | ||||
| 	queue := make(chan []string, Conf.HasherConcurrency*2) | ||||
| 
 | ||||
| 	_, err := Rdb.Ping().Result() | ||||
| 	if err != nil { | ||||
| 		Logger.Fatal("Could not connect to redis server") | ||||
| 	} | ||||
| 
 | ||||
| 	queue := make(chan []string, 100) | ||||
| 
 | ||||
| 	for i := 0; i < Concurrency; i++ { | ||||
| 	for i := 0; i < Conf.HasherConcurrency; i++ { | ||||
| 		go worker(queue) | ||||
| 	} | ||||
| 
 | ||||
| 	dispatchFromQueue("q.reddit.*", queue) | ||||
| 	return dispatchFromQueue("q.reddit.*", queue) | ||||
| } | ||||
|  | ||||
							
								
								
									
										8
									
								
								link.go
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								link.go
									
									
									
									
									
								
							| @ -48,6 +48,10 @@ func IsImageLink(link string) bool { | ||||
| 
 | ||||
| func handleImgurLink(link string, meta *[]Meta) []string { | ||||
| 
 | ||||
| 	if strings.HasPrefix(link, "https://imgur.fun/") { | ||||
| 		link = strings.Replace(link, "imgur.fun", "imgur.com", 1) | ||||
| 	} | ||||
| 
 | ||||
| 	if ReImgurImg.MatchString(link) { | ||||
| 		id := ReImgurImg.FindStringSubmatch(link)[1] | ||||
| 
 | ||||
| @ -56,7 +60,7 @@ func handleImgurLink(link string, meta *[]Meta) []string { | ||||
| 		err := FetchJson( | ||||
| 			"https://api.imgur.com/3/image/"+id, | ||||
| 			&img, &rawJson, | ||||
| 			[]string{"Authorization", "Client-Id 546c25a59c58ad7"}, | ||||
| 			[]string{"Authorization", "Client-Id " + Conf.ImgurClientId}, | ||||
| 		) | ||||
| 		if err != nil { | ||||
| 			return nil | ||||
| @ -75,7 +79,7 @@ func handleImgurLink(link string, meta *[]Meta) []string { | ||||
| 		err := FetchJson( | ||||
| 			"https://api.imgur.com/3/album/"+id, | ||||
| 			&album, &rawJson, | ||||
| 			[]string{"Authorization", "Client-Id 546c25a59c58ad7"}, | ||||
| 			[]string{"Authorization", "Client-Id " + Conf.ImgurClientId}, | ||||
| 		) | ||||
| 		if err != nil { | ||||
| 			return nil | ||||
|  | ||||
| @ -906,9 +906,9 @@ func easyjsonD2b7633eDecodeGithubComSimon987FastimagehashGo(in *jlexer.Lexer, ou | ||||
| 			continue | ||||
| 		} | ||||
| 		switch key { | ||||
| 		case "Size": | ||||
| 		case "size": | ||||
| 			out.Size = int(in.Int()) | ||||
| 		case "Bytes": | ||||
| 		case "bytes": | ||||
| 			if in.IsNull() { | ||||
| 				in.Skip() | ||||
| 				out.Bytes = nil | ||||
| @ -930,12 +930,12 @@ func easyjsonD2b7633eEncodeGithubComSimon987FastimagehashGo(out *jwriter.Writer, | ||||
| 	first := true | ||||
| 	_ = first | ||||
| 	{ | ||||
| 		const prefix string = ",\"Size\":" | ||||
| 		const prefix string = ",\"size\":" | ||||
| 		out.RawString(prefix[1:]) | ||||
| 		out.Int(int(in.Size)) | ||||
| 	} | ||||
| 	{ | ||||
| 		const prefix string = ",\"Bytes\":" | ||||
| 		const prefix string = ",\"bytes\":" | ||||
| 		out.RawString(prefix) | ||||
| 		out.Base64Bytes(in.Bytes) | ||||
| 	} | ||||
|  | ||||
							
								
								
									
										24
									
								
								test.py
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								test.py
									
									
									
									
									
								
							| @ -1,24 +0,0 @@ | ||||
| import requests | ||||
| from base64 import b64encode | ||||
| import json | ||||
| 
 | ||||
| with open("/home/simon/Downloads/a.jpg", "rb") as f: | ||||
|     data = f.read() | ||||
| 
 | ||||
| r = requests.post("http://localhost:8080/api/hash", data=json.dumps({ | ||||
|     "data": b64encode(data).decode() | ||||
| })) | ||||
| 
 | ||||
| # print(r.content) | ||||
| 
 | ||||
| for i in range (0, 49): | ||||
|     r2 = requests.post("http://localhost:8080/api/query", data=json.dumps({ | ||||
|         "hash": r.json()["ahash:12"], | ||||
|         "type": "ahash:12", | ||||
|         "distance": 30, | ||||
|         "limit": 500 + i, | ||||
|         "offset": 0 | ||||
|     })) | ||||
|     print(r2.content.decode()) | ||||
| 
 | ||||
| 
 | ||||
							
								
								
									
										29
									
								
								web/api.go
									
									
									
									
									
								
							
							
						
						
									
										29
									
								
								web/api.go
									
									
									
									
									
								
							| @ -1,16 +1,12 @@ | ||||
| package main | ||||
| package api | ||||
| 
 | ||||
| import ( | ||||
| 	"context" | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"github.com/gin-gonic/gin" | ||||
| 	"github.com/go-redis/redis/v7" | ||||
| 	"github.com/mailru/easyjson" | ||||
| 	. "github.com/simon987/imhashdb" | ||||
| 	"log" | ||||
| 	"os" | ||||
| 	"runtime/pprof" | ||||
| 	"time" | ||||
| ) | ||||
| 
 | ||||
| @ -112,28 +108,15 @@ func hash(c *gin.Context) { | ||||
| 	c.Data(200, gin.MIMEJSON, b) | ||||
| } | ||||
| 
 | ||||
| func main() { | ||||
| 	Init() | ||||
| 
 | ||||
| 	f, err := os.Create("prof") | ||||
| 	if err != nil { | ||||
| 		log.Fatal(err) | ||||
| 	} | ||||
| 	pprof.StartCPUProfile(f) | ||||
| 	go func() { | ||||
| 		time.Sleep(time.Second * 15) | ||||
| 		pprof.StopCPUProfile() | ||||
| 		fmt.Println("!!!!!!!!!!!!!!!") | ||||
| 		f.Close() | ||||
| 	}() | ||||
| 
 | ||||
| func Main() error { | ||||
| 	r := gin.Default() | ||||
| 	r.Use(CORSMiddleware()) | ||||
| 	r.POST("/api/hash", hash) | ||||
| 	r.POST("/api/query", query) | ||||
| 
 | ||||
| 	//TODO: concurrency | ||||
| 	go queryWorker() | ||||
| 	for i := 0; i < Conf.QueryConcurrency; i++ { | ||||
| 		go queryWorker() | ||||
| 	} | ||||
| 
 | ||||
| 	r.Run() | ||||
| 	return r.Run(Conf.ApiAddr) | ||||
| } | ||||
|  | ||||
| @ -1,4 +1,4 @@ | ||||
| package main | ||||
| package api | ||||
| 
 | ||||
| import ( | ||||
| 	"context" | ||||
| @ -17,7 +17,7 @@ const CacheLength = time.Second * 30 | ||||
| func queryWorker() { | ||||
| 	Logger.Info("Query worker started") | ||||
| 	for { | ||||
| 		value := Rdb.BZPopMin(time.Second * 30, inQueue).Val() | ||||
| 		value := Rdb.BZPopMin(time.Second*30, inQueue).Val() | ||||
| 		if value == nil { | ||||
| 			continue | ||||
| 		} | ||||
| @ -38,13 +38,13 @@ func queryWorker() { | ||||
| 			Logger.Info("worker query done") | ||||
| 			b = resp | ||||
| 		} | ||||
| 		Rdb.Set(outQueue + member, b, CacheLength) | ||||
| 		Rdb.Set(outQueue+member, b, CacheLength) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func dbQuery(req QueryReq, value string) ([]byte, error) { | ||||
| 	Rdb.SAdd(wipQueue, value) | ||||
| 	Rdb.Expire(wipQueue, time.Minute * 10) | ||||
| 	Rdb.Expire(wipQueue, time.Minute*10) | ||||
| 
 | ||||
| 	defer Rdb.SRem(wipQueue, value) | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user