mirror of
https://github.com/simon987/imhashdb.git
synced 2025-04-10 14:16:43 +00:00
Option to store data on disk, dockerfile
This commit is contained in:
parent
8d7bc2d133
commit
a25b5072a4
34
Dockerfile
Normal file
34
Dockerfile
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
FROM ubuntu
|
||||||
|
|
||||||
|
ARG DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
RUN apt update
|
||||||
|
RUN apt install git build-essential libopencv-dev wget libssl-dev -y
|
||||||
|
|
||||||
|
RUN wget https://github.com/Kitware/CMake/releases/download/v3.16.2/cmake-3.16.2.tar.gz && \
|
||||||
|
tar -xzf cmake-*.tar.gz && cd cmake-* && ./bootstrap && make -j 4 && make install
|
||||||
|
|
||||||
|
RUN wget http://fftw.org/fftw-3.3.8.tar.gz && tar -xzf fftw-3.3.8.tar.gz && cd fftw-3.3.8 && ./configure --enable-shared --disable-static --enable-threads --with-combined-threads --enable-portable-binary CFLAGS='-fPIC' && make -j 4 && make install
|
||||||
|
|
||||||
|
RUN wget https://dl.google.com/go/go1.14.2.linux-amd64.tar.gz && tar -C /usr/local -xzf go1.14.2.linux-amd64.tar.gz
|
||||||
|
|
||||||
|
WORKDIR /build/
|
||||||
|
|
||||||
|
RUN git clone --recursive https://github.com/simon987/fastimagehash
|
||||||
|
|
||||||
|
WORKDIR /build/fastimagehash
|
||||||
|
|
||||||
|
RUN cmake .
|
||||||
|
RUN make -j 4 && make install
|
||||||
|
|
||||||
|
WORKDIR /build/
|
||||||
|
|
||||||
|
COPY . /build/imhashdb
|
||||||
|
|
||||||
|
WORKDIR /build/imhashdb/cli
|
||||||
|
|
||||||
|
RUN PATH=$PATH:/usr/local/go/bin go build .
|
||||||
|
|
||||||
|
ENV LD_LIBRARY_PATH /usr/local/lib/
|
||||||
|
|
||||||
|
ENTRYPOINT ["/build/imhashdb/cli/cli"]
|
@ -1,3 +1,7 @@
|
|||||||
# imhashdb
|
# imhashdb
|
||||||
|
|
||||||
wip
|
### Requirements
|
||||||
|
1. PostgreSQL
|
||||||
|
1. Redis
|
||||||
|
1. [pg_hamming](https://github.com/simon987/pg_hamming)
|
||||||
|
1. [fastimagehash](https://github.com/simon987/fastimagehash)
|
||||||
|
@ -89,6 +89,13 @@ func main() {
|
|||||||
EnvVars: []string{"IMHASHDB_QUERY_CONCURRENCY"},
|
EnvVars: []string{"IMHASHDB_QUERY_CONCURRENCY"},
|
||||||
Destination: &Conf.QueryConcurrency,
|
Destination: &Conf.QueryConcurrency,
|
||||||
},
|
},
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "store",
|
||||||
|
Value: "",
|
||||||
|
Usage: "If set, store downloaded images there",
|
||||||
|
EnvVars: []string{"IMHASHDB_STORE"},
|
||||||
|
Destination: &Conf.Store,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
3
core.go
3
core.go
@ -12,7 +12,6 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
const RedisPrefix = "q."
|
|
||||||
const UserAgent = "imhashdb/v1.0"
|
const UserAgent = "imhashdb/v1.0"
|
||||||
|
|
||||||
var ImageSuffixes = []string{
|
var ImageSuffixes = []string{
|
||||||
@ -37,6 +36,8 @@ type Config struct {
|
|||||||
HasherConcurrency int
|
HasherConcurrency int
|
||||||
QueryConcurrency int
|
QueryConcurrency int
|
||||||
|
|
||||||
|
Store string
|
||||||
|
|
||||||
ImgurClientId string
|
ImgurClientId string
|
||||||
HasherPattern string
|
HasherPattern string
|
||||||
}
|
}
|
||||||
|
@ -4,8 +4,13 @@ import (
|
|||||||
"crypto/md5"
|
"crypto/md5"
|
||||||
"crypto/sha1"
|
"crypto/sha1"
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"hash/crc32"
|
"hash/crc32"
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@ -51,6 +56,29 @@ func worker(queue chan []string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func storeData(data []byte, sha1 [20]byte, link string) {
|
||||||
|
|
||||||
|
sha1Str := hex.EncodeToString(sha1[:])
|
||||||
|
|
||||||
|
filename := fmt.Sprintf("%s/%c/%s/",
|
||||||
|
DataPath,
|
||||||
|
sha1Str[0],
|
||||||
|
sha1Str[1:3],
|
||||||
|
)
|
||||||
|
err := os.MkdirAll(filename, 0755)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
filename += sha1Str + filepath.Ext(link)
|
||||||
|
|
||||||
|
Logger.Debug("Storing image data to file", zap.String("path", filename))
|
||||||
|
|
||||||
|
err = ioutil.WriteFile(filename, data, 0666)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func computeAndStore(rawTask []string) {
|
func computeAndStore(rawTask []string) {
|
||||||
var task Task
|
var task Task
|
||||||
err := json.Unmarshal([]byte(rawTask[1]), &task)
|
err := json.Unmarshal([]byte(rawTask[1]), &task)
|
||||||
@ -61,7 +89,7 @@ func computeAndStore(rawTask []string) {
|
|||||||
|
|
||||||
meta := []Meta{{
|
meta := []Meta{{
|
||||||
RetrievedAt: time.Now().Unix(),
|
RetrievedAt: time.Now().Unix(),
|
||||||
Id: rawTask[0][len(RedisPrefix):] + "." + strconv.FormatInt(task.Id, 10),
|
Id: rawTask[0][len(Pattern)-1:] + "." + strconv.FormatInt(task.Id, 10),
|
||||||
Meta: []byte(rawTask[1]),
|
Meta: []byte(rawTask[1]),
|
||||||
}}
|
}}
|
||||||
|
|
||||||
@ -86,11 +114,17 @@ func computeAndStore(rawTask []string) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sha1sum := sha1.Sum(data)
|
||||||
|
|
||||||
|
if StoreData {
|
||||||
|
storeData(data, sha1sum, link)
|
||||||
|
}
|
||||||
|
|
||||||
Store(&Entry{
|
Store(&Entry{
|
||||||
H: h,
|
H: h,
|
||||||
Size: len(data),
|
Size: len(data),
|
||||||
Sha256: sha256.Sum256(data),
|
Sha256: sha256.Sum256(data),
|
||||||
Sha1: sha1.Sum(data),
|
Sha1: sha1sum,
|
||||||
Md5: md5.Sum(data),
|
Md5: md5.Sum(data),
|
||||||
Crc32: crc32.ChecksumIEEE(data),
|
Crc32: crc32.ChecksumIEEE(data),
|
||||||
Meta: meta,
|
Meta: meta,
|
||||||
@ -110,6 +144,10 @@ func trimUrl(link string) string {
|
|||||||
return link
|
return link
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var StoreData = Conf.Store != ""
|
||||||
|
var DataPath = Conf.Store
|
||||||
|
var Pattern = "imhash.*"
|
||||||
|
|
||||||
func Main() error {
|
func Main() error {
|
||||||
queue := make(chan []string)
|
queue := make(chan []string)
|
||||||
|
|
||||||
@ -117,5 +155,5 @@ func Main() error {
|
|||||||
go worker(queue)
|
go worker(queue)
|
||||||
}
|
}
|
||||||
|
|
||||||
return dispatchFromQueue("q.reddit.*", queue)
|
return dispatchFromQueue(Pattern, queue)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user