Option to store data on disk, dockerfile

This commit is contained in:
simon987 2020-05-30 14:52:26 -04:00
parent 8d7bc2d133
commit a25b5072a4
5 changed files with 89 additions and 5 deletions

34
Dockerfile Normal file
View File

@ -0,0 +1,34 @@
FROM ubuntu
ARG DEBIAN_FRONTEND=noninteractive
RUN apt update
RUN apt install git build-essential libopencv-dev wget libssl-dev -y
RUN wget https://github.com/Kitware/CMake/releases/download/v3.16.2/cmake-3.16.2.tar.gz && \
tar -xzf cmake-*.tar.gz && cd cmake-* && ./bootstrap && make -j 4 && make install
RUN wget http://fftw.org/fftw-3.3.8.tar.gz && tar -xzf fftw-3.3.8.tar.gz && cd fftw-3.3.8 && ./configure --enable-shared --disable-static --enable-threads --with-combined-threads --enable-portable-binary CFLAGS='-fPIC' && make -j 4 && make install
RUN wget https://dl.google.com/go/go1.14.2.linux-amd64.tar.gz && tar -C /usr/local -xzf go1.14.2.linux-amd64.tar.gz
WORKDIR /build/
RUN git clone --recursive https://github.com/simon987/fastimagehash
WORKDIR /build/fastimagehash
RUN cmake .
RUN make -j 4 && make install
WORKDIR /build/
COPY . /build/imhashdb
WORKDIR /build/imhashdb/cli
RUN PATH=$PATH:/usr/local/go/bin go build .
ENV LD_LIBRARY_PATH /usr/local/lib/
ENTRYPOINT ["/build/imhashdb/cli/cli"]

View File

@ -1,3 +1,7 @@
# imhashdb # imhashdb
wip ### Requirements
1. PostgreSQL
1. Redis
1. [pg_hamming](https://github.com/simon987/pg_hamming)
1. [fastimagehash](https://github.com/simon987/fastimagehash)

View File

@ -89,6 +89,13 @@ func main() {
EnvVars: []string{"IMHASHDB_QUERY_CONCURRENCY"}, EnvVars: []string{"IMHASHDB_QUERY_CONCURRENCY"},
Destination: &Conf.QueryConcurrency, Destination: &Conf.QueryConcurrency,
}, },
&cli.StringFlag{
Name: "store",
Value: "",
Usage: "If set, store downloaded images there",
EnvVars: []string{"IMHASHDB_STORE"},
Destination: &Conf.Store,
},
}, },
}, },
{ {

View File

@ -12,7 +12,6 @@ import (
"strings" "strings"
) )
const RedisPrefix = "q."
const UserAgent = "imhashdb/v1.0" const UserAgent = "imhashdb/v1.0"
var ImageSuffixes = []string{ var ImageSuffixes = []string{
@ -37,6 +36,8 @@ type Config struct {
HasherConcurrency int HasherConcurrency int
QueryConcurrency int QueryConcurrency int
Store string
ImgurClientId string ImgurClientId string
HasherPattern string HasherPattern string
} }

View File

@ -4,8 +4,13 @@ import (
"crypto/md5" "crypto/md5"
"crypto/sha1" "crypto/sha1"
"crypto/sha256" "crypto/sha256"
"encoding/hex"
"encoding/json" "encoding/json"
"fmt"
"hash/crc32" "hash/crc32"
"io/ioutil"
"os"
"path/filepath"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -51,6 +56,29 @@ func worker(queue chan []string) {
} }
} }
func storeData(data []byte, sha1 [20]byte, link string) {
sha1Str := hex.EncodeToString(sha1[:])
filename := fmt.Sprintf("%s/%c/%s/",
DataPath,
sha1Str[0],
sha1Str[1:3],
)
err := os.MkdirAll(filename, 0755)
if err != nil {
panic(err)
}
filename += sha1Str + filepath.Ext(link)
Logger.Debug("Storing image data to file", zap.String("path", filename))
err = ioutil.WriteFile(filename, data, 0666)
if err != nil {
panic(err)
}
}
func computeAndStore(rawTask []string) { func computeAndStore(rawTask []string) {
var task Task var task Task
err := json.Unmarshal([]byte(rawTask[1]), &task) err := json.Unmarshal([]byte(rawTask[1]), &task)
@ -61,7 +89,7 @@ func computeAndStore(rawTask []string) {
meta := []Meta{{ meta := []Meta{{
RetrievedAt: time.Now().Unix(), RetrievedAt: time.Now().Unix(),
Id: rawTask[0][len(RedisPrefix):] + "." + strconv.FormatInt(task.Id, 10), Id: rawTask[0][len(Pattern)-1:] + "." + strconv.FormatInt(task.Id, 10),
Meta: []byte(rawTask[1]), Meta: []byte(rawTask[1]),
}} }}
@ -86,11 +114,17 @@ func computeAndStore(rawTask []string) {
return return
} }
sha1sum := sha1.Sum(data)
if StoreData {
storeData(data, sha1sum, link)
}
Store(&Entry{ Store(&Entry{
H: h, H: h,
Size: len(data), Size: len(data),
Sha256: sha256.Sum256(data), Sha256: sha256.Sum256(data),
Sha1: sha1.Sum(data), Sha1: sha1sum,
Md5: md5.Sum(data), Md5: md5.Sum(data),
Crc32: crc32.ChecksumIEEE(data), Crc32: crc32.ChecksumIEEE(data),
Meta: meta, Meta: meta,
@ -110,6 +144,10 @@ func trimUrl(link string) string {
return link return link
} }
var StoreData = Conf.Store != ""
var DataPath = Conf.Store
var Pattern = "imhash.*"
func Main() error { func Main() error {
queue := make(chan []string) queue := make(chan []string)
@ -117,5 +155,5 @@ func Main() error {
go worker(queue) go worker(queue)
} }
return dispatchFromQueue("q.reddit.*", queue) return dispatchFromQueue(Pattern, queue)
} }