Initial commit

This commit is contained in:
simon987 2020-04-11 11:40:38 -04:00
commit c006431b0f
13 changed files with 2028 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*.prof
.idea/
*.iml

17
Dockerfile Normal file
View File

@ -0,0 +1,17 @@
FROM ubuntu:16.04
# WIP...
RUN apt update
RUN apt install git libopencv-dev wget libssl-dev -y
RUN wget https://github.com/Kitware/CMake/releases/download/v3.16.2/cmake-3.16.2.tar.gz && \
tar -xzf cmake-*.tar.gz && cd cmake-* && ./bootstrap && make -j 4 && make install
WORKDIR /build/
RUN git clone --recursive https://github.com/simon987/fastimagehash
WORKDIR /build/fastimagehash
RUN cmake .
RUN make && make install

239
core.go Normal file
View File

@ -0,0 +1,239 @@
package imhashdb
import (
"encoding/json"
"github.com/go-redis/redis/v7"
"github.com/jackc/pgx"
"github.com/pkg/errors"
"github.com/simon987/fastimagehash-go"
"github.com/valyala/fasthttp"
"go.uber.org/zap"
"log"
"net"
"net/url"
"os"
"strings"
"syscall"
)
const RedisPrefix = "q."
const UserAgent = "imhashdb/v1.0"
const Concurrency = 4
var ImageSuffixes = []string{
".jpeg", ".jpg", ".png",
".jpeg:orig", ".jpg:orig", ".png:orig",
".bmp", ".webp",
}
var ImageBlackList = []string{
}
var Rdb *redis.Client
var Pgdb *pgx.ConnPool
var Logger *zap.Logger
func Init() {
Logger, _ = zap.NewDevelopment()
Rdb = redis.NewClient(&redis.Options{
Addr: "localhost:6379",
Password: "",
DB: 0,
})
Pgdb = DbConnect("localhost", 5432, "imhashdb", "imhashdb", "imhashdb")
DbInit(Pgdb)
}
func ComputeHash(data []byte) (*fastimagehash.MultiHash, error) {
h := &fastimagehash.MultiHash{}
aHash, code := fastimagehash.AHashMem(data, 12)
if code != fastimagehash.Ok {
return nil, errors.Errorf("aHash error: %d", int(code))
}
dHash, code := fastimagehash.DHashMem(data, 12)
if code != fastimagehash.Ok {
return nil, errors.Errorf("dHash error: %d", int(code))
}
mHash, code := fastimagehash.MHashMem(data, 12)
if code != fastimagehash.Ok {
return nil, errors.Errorf("dHash error: %d", int(code))
}
pHash, code := fastimagehash.PHashMem(data, 12, 4)
if code != fastimagehash.Ok {
return nil, errors.Errorf("pHash error: %d", int(code))
}
wHash, code := fastimagehash.WHashMem(data, 8, 0, fastimagehash.Haar)
if code != fastimagehash.Ok {
return nil, errors.Errorf("wHash error: %d", int(code))
}
h.AHash = *aHash
h.DHash = *dHash
h.MHash = *mHash
h.PHash = *pHash
h.WHash = *wHash
return h, nil
}
func TransformLink(link string, meta *[]Meta) []string {
for _, str := range ImageBlackList {
if strings.Contains(link, str) {
return nil
}
}
links := handleImgurLink(link, meta)
if links != nil {
return links
}
return []string{link}
}
func isHttpOk(code int) bool {
return code >= 200 && code < 300
}
func IsImageLink(link string) bool {
u, err := url.Parse(link)
if err != nil {
return false
}
path := strings.ToLower(u.Path)
for _, suffix := range ImageSuffixes {
if strings.HasSuffix(path, suffix) {
return true
}
}
return false
}
func FetchJson(link string, v interface{}, raw *[]byte, headers ...[]string) error {
body, err := Fetch(link, headers...)
if err != nil {
return err
}
err = json.Unmarshal(body, v)
if err != nil {
return err
}
*raw = body
return nil
}
func Fetch(link string, headers ...[]string) ([]byte, error) {
client := &fasthttp.Client{}
req := fasthttp.AcquireRequest()
defer fasthttp.ReleaseRequest(req)
req.SetRequestURI(link)
req.Header.Add("User-Agent", UserAgent)
for _, h := range headers {
req.Header.Add(h[0], h[1])
}
resp := fasthttp.AcquireResponse()
defer fasthttp.ReleaseResponse(resp)
err := client.Do(req, resp)
if err != nil {
Logger.Warn(
"Error during HTTP request",
zap.String("link", link),
zap.String("err", err.Error()),
)
return nil, err
}
code := resp.StatusCode()
if !isHttpOk(code) {
Logger.Debug(
"Got HTTP error code",
zap.String("link", link),
zap.Int("code", code),
)
return nil, errors.Errorf("HTTP %d", code)
}
body := make([]byte, len(resp.Body()))
copy(body, resp.Body())
Logger.Debug(
"HTTP Get",
zap.String("link", link),
zap.Int("size", len(body)),
)
return body, nil
}
func IsPermanentError(err error) bool {
if strings.HasPrefix(err.Error(), "HTTP") {
//TODO: Handle http 429 etc?
return true
}
var opErr *net.OpError
urlErr, ok := err.(*url.Error)
if ok {
opErr, ok = urlErr.Err.(*net.OpError)
if !ok {
if urlErr.Err != nil && urlErr.Err.Error() == "Proxy Authentication Required" {
return true
}
return false
}
if opErr.Err.Error() == "Internal Privoxy Error" {
return true
}
} else {
_, ok := err.(net.Error)
if ok {
_, ok := err.(*net.DNSError)
return ok
}
}
if opErr == nil {
return false
}
if opErr.Timeout() {
// Usually means thalt there is no route to host
return !opErr.Temporary()
}
switch t := opErr.Err.(type) {
case *net.DNSError:
return true
case *os.SyscallError:
if errno, ok := t.Err.(syscall.Errno); ok {
switch errno {
case syscall.ECONNREFUSED:
log.Println("connect refused")
return true
case syscall.ETIMEDOUT:
log.Println("timeout")
return false
case syscall.ECONNRESET:
log.Println("connection reset by peer")
return false
}
}
}
return false
}

301
db.go Normal file
View File

@ -0,0 +1,301 @@
package imhashdb
import (
"context"
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"errors"
"github.com/jackc/pgx"
"github.com/jackc/pgx/pgtype"
"github.com/mailru/easyjson"
"github.com/simon987/fastimagehash-go"
"go.uber.org/zap"
)
const MaxDistance = 30
const MaxLimit = 1000
type Entry struct {
AHash *fastimagehash.Hash
DHash *fastimagehash.Hash
MHash *fastimagehash.Hash
PHash *fastimagehash.Hash
WHash *fastimagehash.Hash
Size int
Sha1 [sha1.Size]byte
Md5 [md5.Size]byte
Sha256 [sha256.Size]byte
Crc32 uint32
Meta []Meta
Url string
}
func Store(entry *Entry) {
row := Pgdb.QueryRow(
`INSERT INTO image (size, sha1, md5, sha256, crc32) VALUES ($1, $2, $3, $4, $5) RETURNING id;`,
entry.Size, entry.Sha1[:], entry.Md5[:], entry.Sha256[:], entry.Crc32,
)
var id int64
imageExists := false
err := row.Scan(&id)
if err != nil {
imageExists = true
row = Pgdb.QueryRow(`SELECT id FROM image WHERE sha1=$1`, entry.Sha1[:])
err := row.Scan(&id)
if err != nil {
Logger.Error("FIXME: Could not insert image", zap.Error(err))
return
}
}
if !imageExists {
_, err = Pgdb.Exec("INSERT INTO hash_ahash VALUES ($1, $2) ON CONFLICT DO NOTHING", id, entry.AHash.Bytes)
if err != nil {
Logger.Error("Could not insert ahash", zap.Error(err))
}
_, err = Pgdb.Exec("INSERT INTO hash_dhash VALUES ($1, $2) ON CONFLICT DO NOTHING", id, entry.DHash.Bytes)
if err != nil {
Logger.Error("Could not insert dhash", zap.Error(err))
}
_, err = Pgdb.Exec("INSERT INTO hash_mhash VALUES ($1, $2) ON CONFLICT DO NOTHING", id, entry.MHash.Bytes)
if err != nil {
Logger.Error("Could not insert mhash", zap.Error(err))
}
_, err = Pgdb.Exec("INSERT INTO hash_phash VALUES ($1, $2) ON CONFLICT DO NOTHING", id, entry.PHash.Bytes)
if err != nil {
Logger.Error("Could not insert phash", zap.Error(err))
}
_, err = Pgdb.Exec("INSERT INTO hash_whash VALUES ($1, $2) ON CONFLICT DO NOTHING", id, entry.WHash.Bytes)
if err != nil {
Logger.Error("Could not insert whash", zap.Error(err))
}
}
for _, meta := range entry.Meta {
_, err = Pgdb.Exec(
"INSERT INTO image_meta VALUES ($1, $2, $3) ON CONFLICT DO NOTHING",
meta.Id, meta.RetrievedAt, meta.Meta,
)
if err != nil {
Logger.Error("Could not insert meta", zap.Error(err))
return
}
_, err = Pgdb.Exec(
"INSERT INTO image_has_meta VALUES ($1, $2, $3) ON CONFLICT DO NOTHING",
id, entry.Url, meta.Id,
)
if err != nil {
Logger.Error("Could not insert ihm", zap.Error(err))
return
}
}
}
func isHashValid(hash []byte, hashType HashType) bool {
switch hashType {
case AHash12:
if len(hash) != 18 {
return false
}
case DHash12:
if len(hash) != 18 {
return false
}
case MHash12:
if len(hash) != 18 {
return false
}
case PHash12:
if len(hash) != 18 {
return false
}
case WHash8Haar:
if len(hash) != 8 {
return false
}
default:
return false
}
return true
}
func FindImagesByHash(ctx context.Context, hash []byte, hashType HashType, distance, limit, offset uint) ([]byte, error) {
if !isHashValid(hash, hashType) {
return nil, errors.New("invalid hash")
}
if distance > MaxDistance {
return nil, errors.New("Invalid distance")
}
if limit > MaxLimit {
return nil, errors.New("Invalid distance")
}
tx, err := Pgdb.BeginEx(ctx, &pgx.TxOptions{IsoLevel: pgx.ReadUncommitted})
if err != nil {
return nil, err
}
defer tx.Commit()
var sql string
switch hashType {
case AHash12:
sql = `SELECT image.* FROM image INNER JOIN hash_ahash h on image.id = h.image_id
WHERE hash_is_within_distance18(h.hash, $1, $2) ORDER BY image.id LIMIT $3 OFFSET $4`
case DHash12:
sql = `SELECT image.* FROM image INNER JOIN hash_dhash h on image.id = h.image_id
WHERE hash_is_within_distance18(h.hash, $1, $2) ORDER BY image.id LIMIT $3 OFFSET $4`
case MHash12:
sql = `SELECT image.* FROM image INNER JOIN hash_mhash h on image.id = h.image_id
WHERE hash_is_within_distance18(h.hash, $1, $2) ORDER BY image.id LIMIT $3 OFFSET $4`
case PHash12:
sql = `SELECT image.* FROM image INNER JOIN hash_phash h on image.id = h.image_id
WHERE hash_is_within_distance18(h.hash, $1, $2) ORDER BY image.id LIMIT $3 OFFSET $4`
case WHash8Haar:
sql = `SELECT image.* FROM image INNER JOIN hash_whash h on image.id = h.image_id
WHERE hash_is_within_distance8(h.hash, $1, $2) ORDER BY image.id LIMIT $3 OFFSET $4`
}
rows, err := tx.Query(sql, hash, distance, limit, offset)
if err != nil {
return nil, err
}
var images []*Image
for rows.Next() {
var im Image
err := rows.Scan(&im.id, &im.Size, &im.Sha1, &im.Md5, &im.Sha256, &im.Crc32)
if err != nil {
Logger.Error("Error while fetching db image", zap.String("err", err.Error()))
return nil, err
}
images = append(images, &im)
}
batch := tx.BeginBatch()
defer batch.Close()
for _, im := range images {
batch.Queue(
`SELECT ihm.url, meta.id, meta.retrieved_at, meta.meta FROM image_has_meta ihm
INNER JOIN image_meta meta on ihm.image_meta_id = meta.id
WHERE image_id=$1`,
[]interface{}{im.id},
[]pgtype.OID{pgtype.Int4OID},
nil,
)
}
err = batch.Send(ctx, nil)
if err != nil {
Logger.Error("Error while fetching db meta", zap.String("err", err.Error()))
return nil, err
}
for _, im := range images {
rows, err := batch.QueryResults()
if err != nil {
Logger.Error("Error while fetching db meta", zap.String("err", err.Error()))
return nil, err
}
for rows.Next() {
var ihm ImageHasMeta
err := rows.Scan(&ihm.Url, &ihm.Meta.Id, &ihm.Meta.RetrievedAt, &ihm.Meta.Meta)
if err != nil {
return nil, err
}
im.Meta = append(im.Meta, ihm)
}
}
b, _ := easyjson.Marshal(ImageList{Images: images})
return b, nil
}
func DbInit(pool *pgx.ConnPool) {
sql := `
CREATE TABLE IF NOT EXISTS image (
id BIGSERIAL PRIMARY KEY,
size INT,
sha1 bytea,
md5 bytea,
sha256 bytea,
crc32 bigint
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_image_sha1 ON image(sha1);
CREATE INDEX IF NOT EXISTS idx_image_md5 ON image(md5);
CREATE INDEX IF NOT EXISTS idx_image_sha256 ON image(sha256);
CREATE INDEX IF NOT EXISTS idx_image_crc32 ON image(crc32);
CREATE TABLE IF NOT EXISTS image_meta (
id TEXT UNIQUE,
retrieved_at bigint,
meta bytea
);
CREATE TABLE IF NOT EXISTS image_has_meta (
image_id bigint REFERENCES image(id),
url TEXT,
image_meta_id text REFERENCES image_meta(id),
UNIQUE(image_id, image_meta_id)
);
CREATE TABLE IF NOT EXISTS hash_phash (
image_id BIGINT REFERENCES image(id) UNIQUE,
hash bytea
);
CREATE TABLE IF NOT EXISTS hash_ahash (
image_id BIGINT REFERENCES image(id) UNIQUE,
hash bytea
);
CREATE TABLE IF NOT EXISTS hash_dhash (
image_id BIGINT REFERENCES image(id) UNIQUE,
hash bytea
);
CREATE TABLE IF NOT EXISTS hash_mhash (
image_id BIGINT REFERENCES image(id) UNIQUE,
hash bytea
);
CREATE TABLE IF NOT EXISTS hash_whash (
image_id BIGINT REFERENCES image(id) UNIQUE,
hash bytea
);
`
_, err := pool.Exec(sql)
if err != nil {
Logger.Fatal("Could not initialize database", zap.String("err", err.Error()))
}
}
func DbConnect(host string, port int, user, password, database string) *pgx.ConnPool {
connPoolConfig := pgx.ConnPoolConfig{
ConnConfig: pgx.ConnConfig{
Host: host,
User: user,
Port: uint16(port),
Password: password,
Database: database,
},
MaxConnections: 10,
}
var err error
pool, err := pgx.NewConnPool(connPoolConfig)
if err != nil {
panic(err)
}
return pool
}

36
go.mod Normal file
View File

@ -0,0 +1,36 @@
module github.com/simon987/imhashdb
go 1.14
require (
github.com/cockroachdb/apd v1.1.0 // indirect
github.com/fsnotify/fsnotify v1.4.9 // indirect
github.com/gin-gonic/gin v1.6.2
github.com/go-redis/redis/v7 v7.2.0
github.com/gofrs/uuid v3.2.0+incompatible // indirect
github.com/golang/protobuf v1.3.5 // indirect
github.com/jackc/fake v0.0.0-20150926172116-812a484cc733 // indirect
github.com/jackc/pgx v3.6.2+incompatible
github.com/klauspost/compress v1.10.4 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/lib/pq v1.3.0 // indirect
github.com/mailru/easyjson v0.7.1
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.1 // indirect
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
github.com/onsi/ginkgo v1.12.0 // indirect
github.com/onsi/gomega v1.9.0 // indirect
github.com/pkg/errors v0.9.1
github.com/shopspring/decimal v0.0.0-20200227202807-02e2044944cc // indirect
github.com/simon987/fastimagehash-go v0.0.0-20200411005122-1886a7c50720
github.com/stretchr/testify v1.5.1 // indirect
github.com/valyala/fasthttp v1.9.0
go.uber.org/zap v1.14.1
golang.org/x/crypto v0.0.0-20200406173513-056763e48d71 // indirect
golang.org/x/lint v0.0.0-20200302205851-738671d3881b // indirect
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e // indirect
golang.org/x/sys v0.0.0-20200409092240-59c9f1ba88fa // indirect
golang.org/x/tools v0.0.0-20200410194907-79a7a3126eef // indirect
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
honnef.co/go/tools v0.0.1-2020.1.3 // indirect
)

178
go.sum Normal file
View File

@ -0,0 +1,178 @@
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I=
github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.6.2 h1:88crIK23zO6TqlQBt+f9FrPJNKm9ZEr7qjp9vl/d5TM=
github.com/gin-gonic/gin v1.6.2/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M=
github.com/go-playground/assert/v2 v2.0.1 h1:MsBgLAaY856+nPRTKrp3/OZK38U/wa0CcBYNjji3q3A=
github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q=
github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8=
github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no=
github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA=
github.com/go-playground/validator/v10 v10.2.0 h1:KgJ0snyC2R9VXYN2rneOtQcw5aHQB1Vv0sFl1UcHBOY=
github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI=
github.com/go-redis/redis/v7 v7.2.0 h1:CrCexy/jYWZjW0AyVoHlcJUeZN19VWlbepTh1Vq6dJs=
github.com/go-redis/redis/v7 v7.2.0/go.mod h1:JDNMw23GTyLNC4GZu9njt15ctBQVn7xjRfnwdHj/Dcg=
github.com/gofrs/uuid v3.2.0+incompatible h1:y12jRkkFxsd7GpqdSZ+/KCs/fJbqpEXSGd4+jfEaewE=
github.com/gofrs/uuid v3.2.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.3.5 h1:F768QJ1E9tib+q5Sc8MkdJi1RxLTbRcTf8LJV56aRls=
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/jackc/fake v0.0.0-20150926172116-812a484cc733 h1:vr3AYkKovP8uR8AvSGGUK1IDqRa5lAAvEkZG1LKaCRc=
github.com/jackc/fake v0.0.0-20150926172116-812a484cc733/go.mod h1:WrMFNQdiFJ80sQsxDoMokWK1W5TQtxBFNpzWTD84ibQ=
github.com/jackc/pgx v3.6.2+incompatible h1:2zP5OD7kiyR3xzRYMhOcXVvkDZsImVXfj+yIyTQf3/o=
github.com/jackc/pgx v3.6.2+incompatible/go.mod h1:0ZGrqGqkRlliWnWB4zKnWtjbSWbGkVEFm4TeybAXq+I=
github.com/json-iterator/go v1.1.9 h1:9yzud/Ht36ygwatGx56VwCZtlI/2AD15T1X2sjSuGns=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.10.4 h1:jFzIFaf586tquEB5EhzQG0HwGNSlgAJpG53G6Ss11wc=
github.com/klauspost/compress v1.10.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y=
github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII=
github.com/lib/pq v1.3.0 h1:/qkRGz8zljWiDcFvgpwUpwIAPu3r07TDvs3Rws+o/pU=
github.com/lib/pq v1.3.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/mailru/easyjson v0.7.1 h1:mdxE1MF9o53iCb2Ghj1VfWvh7ZOwHpnVG/xwXrV90U8=
github.com/mailru/easyjson v0.7.1/go.mod h1:KAzv3t3aY1NaHWoQz1+4F1ccyAH66Jk7yos7ldAVICs=
github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.10.1/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.12.0 h1:Iw5WCbBcaAAd0fpRb1c9r5YCylv4XDoCSigm1zLevwU=
github.com/onsi/ginkgo v1.12.0/go.mod h1:oUhWkIvk5aDxtKvDDuw8gItl8pKl42LzjC9KZE0HfGg=
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
github.com/onsi/gomega v1.9.0 h1:R1uwffexN6Pr340GtYRIdZmAiN4J+iw6WG4wog1DUXg=
github.com/onsi/gomega v1.9.0/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/shopspring/decimal v0.0.0-20200227202807-02e2044944cc h1:jUIKcSPO9MoMJBbEoyE/RJoE8vz7Mb8AjvifMMwSyvY=
github.com/shopspring/decimal v0.0.0-20200227202807-02e2044944cc/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/simon987/fastimagehash-go v0.0.0-20200411005122-1886a7c50720 h1:0VrGo7jKQqv5cmuD/7Yd2O+o98/eyLi2wl4wWNsKfh0=
github.com/simon987/fastimagehash-go v0.0.0-20200411005122-1886a7c50720/go.mod h1:MbqNG+6OaprdElEIes1aYF7qmLlaTop4j5X6pgNiaaw=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/ugorji/go v1.1.7 h1:/68gy2h+1mWMrwZFeD1kQialdSzAb432dtpeJ42ovdo=
github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
github.com/ugorji/go/codec v1.1.7 h1:2SvQaVZ1ouYrrKKwoSk2pzd4A9evlKJb9oTL+OaLUSs=
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.9.0 h1:hNpmUdy/+ZXYpGy0OBfm7K0UQTzb73W0T0U4iJIVrMw=
github.com/valyala/fasthttp v1.9.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w=
github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.uber.org/atomic v1.6.0 h1:Ezj3JGmsOnG1MoRWQkPBsKLe9DwWD9QeXzTRzzldNVk=
go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
go.uber.org/multierr v1.5.0 h1:KCa4XfM8CWFCpxXRGok+Q0SS/0XBhMDbHHGABQLvD2A=
go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU=
go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee h1:0mgffUl7nfd+FpvXMVz4IDEaUSmT1ysygQC7qYo7sG4=
go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA=
go.uber.org/zap v1.14.1 h1:nYDKopTbvAPq/NrUVZwT15y2lpROBiLLyoRTbXOYWOo=
go.uber.org/zap v1.14.1/go.mod h1:Mb2vm2krFEG5DV0W9qcHBYFtp/Wku1cvYaqPsS/WYfc=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200406173513-056763e48d71 h1:DOmugCavvUtnUD114C1Wh+UgTgQZ4pMLzXxi1pSt+/Y=
golang.org/x/crypto v0.0.0-20200406173513-056763e48d71/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20200302205851-738671d3881b h1:Wh+f8QHJXR411sJR8/vRBTZ7YapZaRvUcLFFJhusH0k=
golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0 h1:KU7oHjnv3XNWfa5COkzUifxZmxp1TyI7ImMXqFxLwvQ=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200409092240-59c9f1ba88fa h1:mQTN3ECqfsViCNBgq+A40vdwhkGykrrQlYe3mPj6BoU=
golang.org/x/sys v0.0.0-20200409092240-59c9f1ba88fa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200410194907-79a7a3126eef h1:RHORRhs540cYZYrzgU2CPUyykkwZM78hGdzocOo9P8A=
golang.org/x/tools v0.0.0-20200410194907-79a7a3126eef/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
honnef.co/go/tools v0.0.1-2020.1.3 h1:sXmLre5bzIR6ypkjXCDI3jHPssRhc8KD/Ome589sc3U=
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=

125
hasher/hasher.go Normal file
View File

@ -0,0 +1,125 @@
package main
import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"encoding/json"
"hash/crc32"
"strconv"
"strings"
"time"
. "github.com/simon987/imhashdb"
"go.uber.org/zap"
)
type Task struct {
Urls []string `json:"_urls"`
Id int64 `json:"_id"`
}
func dispatchFromQueue(pattern string, queue chan []string) {
for {
keys, err := Rdb.Keys(pattern).Result()
if err != nil {
Logger.Error("Could not get keys for pattern", zap.String("pattern", pattern))
continue
}
rawTask, err := Rdb.BLPop(time.Second*30, keys...).Result()
if err != nil {
continue
}
queue <- rawTask
}
}
func worker(queue chan []string) {
for rawTask := range queue {
computeAndStore(rawTask)
}
}
func computeAndStore(rawTask []string) {
var task Task
err := json.Unmarshal([]byte(rawTask[1]), &task)
if err != nil {
Logger.Error("Corrupt task body", zap.String("body", rawTask[1]))
return
}
meta := []Meta{{
RetrievedAt: time.Now().Unix(),
Id: rawTask[0][len(RedisPrefix):] + "." + strconv.FormatInt(task.Id, 10),
Meta: []byte(rawTask[1]),
}}
for _, link := range task.Urls {
for _, turl := range TransformLink(link, &meta) {
if !IsImageLink(turl) {
Logger.Debug("Ignoring non-image URL", zap.String("link", link))
continue
}
data, err := Fetch(turl)
if err != nil {
if !IsPermanentError(err) {
// Retry later
Logger.Debug("Will retry task later", zap.String("link", link))
Rdb.RPush(rawTask[0], rawTask[1])
}
continue
}
h, err := ComputeHash(data)
if err != nil {
return
}
Store(&Entry{
AHash: &h.AHash,
DHash: &h.DHash,
MHash: &h.MHash,
PHash: &h.PHash,
WHash: &h.WHash,
Size: len(data),
Sha256: sha256.Sum256(data),
Sha1: sha1.Sum(data),
Md5: md5.Sum(data),
Crc32: crc32.ChecksumIEEE(data),
Meta: meta,
Url: trimUrl(turl),
})
}
}
}
func trimUrl(link string) string {
if strings.HasPrefix(link, "https://") {
return link[len("https://"):]
} else if strings.HasPrefix(link, "http://") {
return link[len("http://"):]
}
return link
}
func main() {
Init()
_, err := Rdb.Ping().Result()
if err != nil {
Logger.Fatal("Could not connect to redis server")
}
queue := make(chan []string, 100)
for i := 0; i < Concurrency; i++ {
go worker(queue)
}
dispatchFromQueue("q.reddit.*", queue)
}

75
link.go Normal file
View File

@ -0,0 +1,75 @@
package imhashdb
import (
"go.uber.org/zap"
"regexp"
"time"
)
var ReImgurImg = regexp.MustCompile("(?:https?://)?(?:www\\.|[im]\\.)?imgur\\.com/(\\w{7}|\\w{5})[sbtmlh]?")
var ReImgurAlbum = regexp.MustCompile("(?:https?://)?(?:www\\.|[im]\\.)?imgur\\.com/a/(\\w{7}|\\w{5})")
type ImgurImgResp struct {
Data ImgurImg `json:"data"`
}
type ImgurImg struct {
Link string `json:"link"`
}
type ImgurAlbumResp struct {
Data struct {
Images []ImgurImg `json:"images"`
} `json:"data"`
}
func handleImgurLink(link string, meta *[]Meta) []string {
if ReImgurImg.MatchString(link) {
id := ReImgurImg.FindStringSubmatch(link)[1]
var img ImgurImgResp
var rawJson []byte
err := FetchJson(
"https://api.imgur.com/3/image/"+id,
&img, &rawJson,
[]string{"Authorization", "Client-Id 546c25a59c58ad7"},
)
if err != nil {
return nil
}
Logger.Debug("Got ImgurImgResp", zap.String("id", id))
*meta = append(*meta, Meta{RetrievedAt: time.Now().Unix(), Id: "imgur.i." + id, Meta: rawJson})
return []string{img.Data.Link}
} else if ReImgurAlbum.MatchString(link) {
id := ReImgurAlbum.FindStringSubmatch(link)[1]
var album ImgurAlbumResp
var rawJson []byte
err := FetchJson(
"https://api.imgur.com/3/album/"+id,
&album, &rawJson,
[]string{"Authorization", "Client-Id 546c25a59c58ad7"},
)
if err != nil {
return nil
}
Logger.Debug(
"Got ImgurAlbumResp",
zap.String("id", id),
zap.Int("count", len(album.Data.Images)),
)
*meta = append(*meta, Meta{RetrievedAt: time.Now().Unix(), Id: "imgur.a." + id, Meta: rawJson})
var links = make([]string, len(album.Data.Images))
for i, img := range album.Data.Images {
links[i] = img.Link
}
return links
}
return nil
}

61
models.go Normal file
View File

@ -0,0 +1,61 @@
package imhashdb
type HashType string
const (
AHash12 HashType = "ahash:12"
DHash12 HashType = "dhash:12"
MHash12 HashType = "mhash:12"
PHash12 HashType = "phaash:12:4"
WHash8Haar HashType = "whash:8:haar"
)
type HashReq struct {
Data []byte `json:"data"`
}
type HashResp struct {
AHash []byte `json:"ahash:12"`
DHash []byte `json:"dhash:12"`
MHash []byte `json:"mhash:12"`
PHash []byte `json:"phash:12:4"`
WHash []byte `json:"whash:18:haar"`
}
type QueryReq struct {
HashType HashType `json:"type"`
Hash []byte `json:"hash"`
Distance uint `json:"distance"`
Limit uint `json:"limit"`
Offset uint `json:"offset"`
}
type ImageList struct {
Images []*Image `json:"images"`
}
type QueryResp struct {
Err string `json:"err,omitempty"`
}
type Meta struct {
RetrievedAt int64 `json:"retrieved_at"`
Id string `json:"id"`
Meta []byte `json:"meta"`
}
type ImageHasMeta struct {
Url string `json:"url"`
Meta Meta `json:"meta"`
}
type Image struct {
id int64
Size int `json:"size"`
Sha1 []byte `json:"sha1"`
Md5 []byte `json:"md5"`
Sha256 []byte `json:"sha256"`
Crc32 uint32 `json:"crc32"`
Meta []ImageHasMeta `json:"meta"`
}

791
models_easyjson.go Normal file
View File

@ -0,0 +1,791 @@
// Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT.
package imhashdb
import (
json "encoding/json"
easyjson "github.com/mailru/easyjson"
jlexer "github.com/mailru/easyjson/jlexer"
jwriter "github.com/mailru/easyjson/jwriter"
)
// suppress unused package warning
var (
_ *json.RawMessage
_ *jlexer.Lexer
_ *jwriter.Writer
_ easyjson.Marshaler
)
func easyjsonD2b7633eDecodeGithubComSimon987Imhashdb(in *jlexer.Lexer, out *QueryResp) {
isTopLevel := in.IsStart()
if in.IsNull() {
if isTopLevel {
in.Consumed()
}
in.Skip()
return
}
in.Delim('{')
for !in.IsDelim('}') {
key := in.UnsafeString()
in.WantColon()
if in.IsNull() {
in.Skip()
in.WantComma()
continue
}
switch key {
case "err":
out.Err = string(in.String())
default:
in.SkipRecursive()
}
in.WantComma()
}
in.Delim('}')
if isTopLevel {
in.Consumed()
}
}
func easyjsonD2b7633eEncodeGithubComSimon987Imhashdb(out *jwriter.Writer, in QueryResp) {
out.RawByte('{')
first := true
_ = first
if in.Err != "" {
const prefix string = ",\"err\":"
first = false
out.RawString(prefix[1:])
out.String(string(in.Err))
}
out.RawByte('}')
}
// MarshalJSON supports json.Marshaler interface
func (v QueryResp) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v QueryResp) MarshalEasyJSON(w *jwriter.Writer) {
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *QueryResp) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *QueryResp) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb(l, v)
}
func easyjsonD2b7633eDecodeGithubComSimon987Imhashdb1(in *jlexer.Lexer, out *QueryReq) {
isTopLevel := in.IsStart()
if in.IsNull() {
if isTopLevel {
in.Consumed()
}
in.Skip()
return
}
in.Delim('{')
for !in.IsDelim('}') {
key := in.UnsafeString()
in.WantColon()
if in.IsNull() {
in.Skip()
in.WantComma()
continue
}
switch key {
case "type":
out.HashType = HashType(in.String())
case "hash":
if in.IsNull() {
in.Skip()
out.Hash = nil
} else {
out.Hash = in.Bytes()
}
case "distance":
out.Distance = uint(in.Uint())
case "limit":
out.Limit = uint(in.Uint())
case "offset":
out.Offset = uint(in.Uint())
default:
in.SkipRecursive()
}
in.WantComma()
}
in.Delim('}')
if isTopLevel {
in.Consumed()
}
}
func easyjsonD2b7633eEncodeGithubComSimon987Imhashdb1(out *jwriter.Writer, in QueryReq) {
out.RawByte('{')
first := true
_ = first
{
const prefix string = ",\"type\":"
out.RawString(prefix[1:])
out.String(string(in.HashType))
}
{
const prefix string = ",\"hash\":"
out.RawString(prefix)
out.Base64Bytes(in.Hash)
}
{
const prefix string = ",\"distance\":"
out.RawString(prefix)
out.Uint(uint(in.Distance))
}
{
const prefix string = ",\"limit\":"
out.RawString(prefix)
out.Uint(uint(in.Limit))
}
{
const prefix string = ",\"offset\":"
out.RawString(prefix)
out.Uint(uint(in.Offset))
}
out.RawByte('}')
}
// MarshalJSON supports json.Marshaler interface
func (v QueryReq) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb1(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v QueryReq) MarshalEasyJSON(w *jwriter.Writer) {
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb1(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *QueryReq) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb1(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *QueryReq) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb1(l, v)
}
func easyjsonD2b7633eDecodeGithubComSimon987Imhashdb2(in *jlexer.Lexer, out *Meta) {
isTopLevel := in.IsStart()
if in.IsNull() {
if isTopLevel {
in.Consumed()
}
in.Skip()
return
}
in.Delim('{')
for !in.IsDelim('}') {
key := in.UnsafeString()
in.WantColon()
if in.IsNull() {
in.Skip()
in.WantComma()
continue
}
switch key {
case "retrieved_at":
out.RetrievedAt = int64(in.Int64())
case "id":
out.Id = string(in.String())
case "meta":
if in.IsNull() {
in.Skip()
out.Meta = nil
} else {
out.Meta = in.Bytes()
}
default:
in.SkipRecursive()
}
in.WantComma()
}
in.Delim('}')
if isTopLevel {
in.Consumed()
}
}
func easyjsonD2b7633eEncodeGithubComSimon987Imhashdb2(out *jwriter.Writer, in Meta) {
out.RawByte('{')
first := true
_ = first
{
const prefix string = ",\"retrieved_at\":"
out.RawString(prefix[1:])
out.Int64(int64(in.RetrievedAt))
}
{
const prefix string = ",\"id\":"
out.RawString(prefix)
out.String(string(in.Id))
}
{
const prefix string = ",\"meta\":"
out.RawString(prefix)
out.Base64Bytes(in.Meta)
}
out.RawByte('}')
}
// MarshalJSON supports json.Marshaler interface
func (v Meta) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb2(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v Meta) MarshalEasyJSON(w *jwriter.Writer) {
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb2(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *Meta) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb2(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *Meta) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb2(l, v)
}
func easyjsonD2b7633eDecodeGithubComSimon987Imhashdb3(in *jlexer.Lexer, out *ImageList) {
isTopLevel := in.IsStart()
if in.IsNull() {
if isTopLevel {
in.Consumed()
}
in.Skip()
return
}
in.Delim('{')
for !in.IsDelim('}') {
key := in.UnsafeString()
in.WantColon()
if in.IsNull() {
in.Skip()
in.WantComma()
continue
}
switch key {
case "images":
if in.IsNull() {
in.Skip()
out.Images = nil
} else {
in.Delim('[')
if out.Images == nil {
if !in.IsDelim(']') {
out.Images = make([]*Image, 0, 8)
} else {
out.Images = []*Image{}
}
} else {
out.Images = (out.Images)[:0]
}
for !in.IsDelim(']') {
var v7 *Image
if in.IsNull() {
in.Skip()
v7 = nil
} else {
if v7 == nil {
v7 = new(Image)
}
(*v7).UnmarshalEasyJSON(in)
}
out.Images = append(out.Images, v7)
in.WantComma()
}
in.Delim(']')
}
default:
in.SkipRecursive()
}
in.WantComma()
}
in.Delim('}')
if isTopLevel {
in.Consumed()
}
}
func easyjsonD2b7633eEncodeGithubComSimon987Imhashdb3(out *jwriter.Writer, in ImageList) {
out.RawByte('{')
first := true
_ = first
{
const prefix string = ",\"images\":"
out.RawString(prefix[1:])
if in.Images == nil && (out.Flags&jwriter.NilSliceAsEmpty) == 0 {
out.RawString("null")
} else {
out.RawByte('[')
for v8, v9 := range in.Images {
if v8 > 0 {
out.RawByte(',')
}
if v9 == nil {
out.RawString("null")
} else {
(*v9).MarshalEasyJSON(out)
}
}
out.RawByte(']')
}
}
out.RawByte('}')
}
// MarshalJSON supports json.Marshaler interface
func (v ImageList) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb3(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v ImageList) MarshalEasyJSON(w *jwriter.Writer) {
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb3(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *ImageList) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb3(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *ImageList) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb3(l, v)
}
func easyjsonD2b7633eDecodeGithubComSimon987Imhashdb4(in *jlexer.Lexer, out *ImageHasMeta) {
isTopLevel := in.IsStart()
if in.IsNull() {
if isTopLevel {
in.Consumed()
}
in.Skip()
return
}
in.Delim('{')
for !in.IsDelim('}') {
key := in.UnsafeString()
in.WantColon()
if in.IsNull() {
in.Skip()
in.WantComma()
continue
}
switch key {
case "url":
out.Url = string(in.String())
case "meta":
(out.Meta).UnmarshalEasyJSON(in)
default:
in.SkipRecursive()
}
in.WantComma()
}
in.Delim('}')
if isTopLevel {
in.Consumed()
}
}
func easyjsonD2b7633eEncodeGithubComSimon987Imhashdb4(out *jwriter.Writer, in ImageHasMeta) {
out.RawByte('{')
first := true
_ = first
{
const prefix string = ",\"url\":"
out.RawString(prefix[1:])
out.String(string(in.Url))
}
{
const prefix string = ",\"meta\":"
out.RawString(prefix)
(in.Meta).MarshalEasyJSON(out)
}
out.RawByte('}')
}
// MarshalJSON supports json.Marshaler interface
func (v ImageHasMeta) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb4(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v ImageHasMeta) MarshalEasyJSON(w *jwriter.Writer) {
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb4(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *ImageHasMeta) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb4(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *ImageHasMeta) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb4(l, v)
}
func easyjsonD2b7633eDecodeGithubComSimon987Imhashdb5(in *jlexer.Lexer, out *Image) {
isTopLevel := in.IsStart()
if in.IsNull() {
if isTopLevel {
in.Consumed()
}
in.Skip()
return
}
in.Delim('{')
for !in.IsDelim('}') {
key := in.UnsafeString()
in.WantColon()
if in.IsNull() {
in.Skip()
in.WantComma()
continue
}
switch key {
case "size":
out.Size = int(in.Int())
case "sha1":
if in.IsNull() {
in.Skip()
out.Sha1 = nil
} else {
out.Sha1 = in.Bytes()
}
case "md5":
if in.IsNull() {
in.Skip()
out.Md5 = nil
} else {
out.Md5 = in.Bytes()
}
case "sha256":
if in.IsNull() {
in.Skip()
out.Sha256 = nil
} else {
out.Sha256 = in.Bytes()
}
case "crc32":
out.Crc32 = uint32(in.Uint32())
case "meta":
if in.IsNull() {
in.Skip()
out.Meta = nil
} else {
in.Delim('[')
if out.Meta == nil {
if !in.IsDelim(']') {
out.Meta = make([]ImageHasMeta, 0, 1)
} else {
out.Meta = []ImageHasMeta{}
}
} else {
out.Meta = (out.Meta)[:0]
}
for !in.IsDelim(']') {
var v13 ImageHasMeta
(v13).UnmarshalEasyJSON(in)
out.Meta = append(out.Meta, v13)
in.WantComma()
}
in.Delim(']')
}
default:
in.SkipRecursive()
}
in.WantComma()
}
in.Delim('}')
if isTopLevel {
in.Consumed()
}
}
func easyjsonD2b7633eEncodeGithubComSimon987Imhashdb5(out *jwriter.Writer, in Image) {
out.RawByte('{')
first := true
_ = first
{
const prefix string = ",\"size\":"
out.RawString(prefix[1:])
out.Int(int(in.Size))
}
{
const prefix string = ",\"sha1\":"
out.RawString(prefix)
out.Base64Bytes(in.Sha1)
}
{
const prefix string = ",\"md5\":"
out.RawString(prefix)
out.Base64Bytes(in.Md5)
}
{
const prefix string = ",\"sha256\":"
out.RawString(prefix)
out.Base64Bytes(in.Sha256)
}
{
const prefix string = ",\"crc32\":"
out.RawString(prefix)
out.Uint32(uint32(in.Crc32))
}
{
const prefix string = ",\"meta\":"
out.RawString(prefix)
if in.Meta == nil && (out.Flags&jwriter.NilSliceAsEmpty) == 0 {
out.RawString("null")
} else {
out.RawByte('[')
for v20, v21 := range in.Meta {
if v20 > 0 {
out.RawByte(',')
}
(v21).MarshalEasyJSON(out)
}
out.RawByte(']')
}
}
out.RawByte('}')
}
// MarshalJSON supports json.Marshaler interface
func (v Image) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb5(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v Image) MarshalEasyJSON(w *jwriter.Writer) {
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb5(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *Image) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb5(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *Image) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb5(l, v)
}
func easyjsonD2b7633eDecodeGithubComSimon987Imhashdb6(in *jlexer.Lexer, out *HashResp) {
isTopLevel := in.IsStart()
if in.IsNull() {
if isTopLevel {
in.Consumed()
}
in.Skip()
return
}
in.Delim('{')
for !in.IsDelim('}') {
key := in.UnsafeString()
in.WantColon()
if in.IsNull() {
in.Skip()
in.WantComma()
continue
}
switch key {
case "ahash:12":
if in.IsNull() {
in.Skip()
out.AHash = nil
} else {
out.AHash = in.Bytes()
}
case "dhash:12":
if in.IsNull() {
in.Skip()
out.DHash = nil
} else {
out.DHash = in.Bytes()
}
case "mhash:12":
if in.IsNull() {
in.Skip()
out.MHash = nil
} else {
out.MHash = in.Bytes()
}
case "phash:12:4":
if in.IsNull() {
in.Skip()
out.PHash = nil
} else {
out.PHash = in.Bytes()
}
case "whash:18:haar":
if in.IsNull() {
in.Skip()
out.WHash = nil
} else {
out.WHash = in.Bytes()
}
default:
in.SkipRecursive()
}
in.WantComma()
}
in.Delim('}')
if isTopLevel {
in.Consumed()
}
}
func easyjsonD2b7633eEncodeGithubComSimon987Imhashdb6(out *jwriter.Writer, in HashResp) {
out.RawByte('{')
first := true
_ = first
{
const prefix string = ",\"ahash:12\":"
out.RawString(prefix[1:])
out.Base64Bytes(in.AHash)
}
{
const prefix string = ",\"dhash:12\":"
out.RawString(prefix)
out.Base64Bytes(in.DHash)
}
{
const prefix string = ",\"mhash:12\":"
out.RawString(prefix)
out.Base64Bytes(in.MHash)
}
{
const prefix string = ",\"phash:12:4\":"
out.RawString(prefix)
out.Base64Bytes(in.PHash)
}
{
const prefix string = ",\"whash:18:haar\":"
out.RawString(prefix)
out.Base64Bytes(in.WHash)
}
out.RawByte('}')
}
// MarshalJSON supports json.Marshaler interface
func (v HashResp) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb6(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v HashResp) MarshalEasyJSON(w *jwriter.Writer) {
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb6(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *HashResp) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb6(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *HashResp) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb6(l, v)
}
func easyjsonD2b7633eDecodeGithubComSimon987Imhashdb7(in *jlexer.Lexer, out *HashReq) {
isTopLevel := in.IsStart()
if in.IsNull() {
if isTopLevel {
in.Consumed()
}
in.Skip()
return
}
in.Delim('{')
for !in.IsDelim('}') {
key := in.UnsafeString()
in.WantColon()
if in.IsNull() {
in.Skip()
in.WantComma()
continue
}
switch key {
case "data":
if in.IsNull() {
in.Skip()
out.Data = nil
} else {
out.Data = in.Bytes()
}
default:
in.SkipRecursive()
}
in.WantComma()
}
in.Delim('}')
if isTopLevel {
in.Consumed()
}
}
func easyjsonD2b7633eEncodeGithubComSimon987Imhashdb7(out *jwriter.Writer, in HashReq) {
out.RawByte('{')
first := true
_ = first
{
const prefix string = ",\"data\":"
out.RawString(prefix[1:])
out.Base64Bytes(in.Data)
}
out.RawByte('}')
}
// MarshalJSON supports json.Marshaler interface
func (v HashReq) MarshalJSON() ([]byte, error) {
w := jwriter.Writer{}
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb7(&w, v)
return w.Buffer.BuildBytes(), w.Error
}
// MarshalEasyJSON supports easyjson.Marshaler interface
func (v HashReq) MarshalEasyJSON(w *jwriter.Writer) {
easyjsonD2b7633eEncodeGithubComSimon987Imhashdb7(w, v)
}
// UnmarshalJSON supports json.Unmarshaler interface
func (v *HashReq) UnmarshalJSON(data []byte) error {
r := jlexer.Lexer{Data: data}
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb7(&r, v)
return r.Error()
}
// UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (v *HashReq) UnmarshalEasyJSON(l *jlexer.Lexer) {
easyjsonD2b7633eDecodeGithubComSimon987Imhashdb7(l, v)
}

24
test.py Normal file
View File

@ -0,0 +1,24 @@
import requests
from base64 import b64encode
import json
with open("/home/simon/Downloads/a.jpg", "rb") as f:
data = f.read()
r = requests.post("http://localhost:8080/api/hash", data=json.dumps({
"data": b64encode(data).decode()
}))
# print(r.content)
for i in range (0, 49):
r2 = requests.post("http://localhost:8080/api/query", data=json.dumps({
"hash": r.json()["ahash:12"],
"type": "ahash:12",
"distance": 30,
"limit": 500 + i,
"offset": 0
}))
print(r2.content.decode())

120
web/api.go Normal file
View File

@ -0,0 +1,120 @@
package main
import (
"context"
"errors"
"fmt"
"github.com/gin-gonic/gin"
"github.com/go-redis/redis/v7"
"github.com/mailru/easyjson"
. "github.com/simon987/imhashdb"
"log"
"os"
"runtime/pprof"
"time"
)
func submitQuery(value string) bool {
if Rdb.SIsMember(wipQueue, value).Val() {
return false
}
if Rdb.Exists(outQueue+value).Val() == 1 {
return false
}
Rdb.ZAdd(inQueue, &redis.Z{
Score: float64(time.Now().Unix()),
Member: value,
})
return true
}
func pollQuery(ctx context.Context, reqStr string) ([]byte, error) {
key := outQueue + reqStr
for {
select {
case <-ctx.Done():
return nil, errors.New("timeout")
default:
}
value, _ := Rdb.Get(key).Bytes()
if value != nil {
return value, nil
}
time.Sleep(time.Millisecond * 50)
}
}
func query(c *gin.Context) {
var req QueryReq
err := c.BindJSON(&req)
if err != nil {
c.JSON(400, gin.H{"err": "Invalid request"})
return
}
ctx, cancel := context.WithTimeout(context.Background(), time.Second*20)
defer cancel()
reqJson, _ := easyjson.Marshal(req)
value := string(reqJson)
submitQuery(value)
b, err := pollQuery(ctx, value)
if err != nil {
b, _ = easyjson.Marshal(QueryResp{
Err: err.Error(),
})
}
c.Data(200, gin.MIMEJSON, b)
}
func hash(c *gin.Context) {
var req HashReq
err := c.BindJSON(&req)
if err != nil {
c.JSON(400, gin.H{"err": "Invalid request"})
return
}
h, err := ComputeHash(req.Data)
if err != nil {
c.JSON(500, gin.H{"err": "Couldn't compute image hash"})
return
}
b, _ := easyjson.Marshal(HashResp{
AHash: h.AHash.Bytes, DHash: h.DHash.Bytes,
MHash: h.MHash.Bytes, PHash: h.PHash.Bytes,
WHash: h.WHash.Bytes,
})
c.Data(200, gin.MIMEJSON, b)
}
func main() {
Init()
f, err := os.Create("prof")
if err != nil {
log.Fatal(err)
}
pprof.StartCPUProfile(f)
go func() {
time.Sleep(time.Second * 15)
pprof.StopCPUProfile()
fmt.Println("!!!!!!!!!!!!!!!")
f.Close()
}()
r := gin.Default()
r.POST("/api/hash", hash)
r.POST("/api/query", query)
//TODO: concurrency
go queryWorker()
r.Run()
}

58
web/worker.go Normal file
View File

@ -0,0 +1,58 @@
package main
import (
"context"
"github.com/mailru/easyjson"
. "github.com/simon987/imhashdb"
"go.uber.org/zap"
"time"
)
const inQueue = "qq:in"
const outQueue = "qq:out:"
const wipQueue = "qq:wip"
const CacheLength = time.Second * 30
func queryWorker() {
Logger.Info("Query worker started")
for {
value := Rdb.BZPopMin(time.Second * 30, inQueue).Val()
if value == nil {
continue
}
Logger.Info("worker query start")
member := value.Member.(string)
var req QueryReq
_ = easyjson.Unmarshal([]byte(member), &req)
resp, err := dbQuery(req, member)
var b []byte
if err != nil {
Logger.Warn("worker query error", zap.Error(err))
b, _ = easyjson.Marshal(QueryResp{
Err: err.Error(),
})
} else {
Logger.Info("worker query done")
b = resp
}
Rdb.Set(outQueue + member, b, CacheLength)
}
}
func dbQuery(req QueryReq, value string) ([]byte, error) {
Rdb.SAdd(wipQueue, value)
Rdb.Expire(wipQueue, time.Minute * 10)
defer Rdb.SRem(wipQueue, value)
resp, err := FindImagesByHash(context.Background(), req.Hash, req.HashType, req.Distance, req.Limit, req.Offset)
if err != nil {
Logger.Error("Couldn't perform query")
return nil, err
}
return resp, nil
}