Option to store data on disk, dockerfile

This commit is contained in:
simon987 2020-05-30 14:52:26 -04:00
parent 8d7bc2d133
commit a25b5072a4
5 changed files with 89 additions and 5 deletions

34
Dockerfile Normal file
View File

@ -0,0 +1,34 @@
FROM ubuntu
ARG DEBIAN_FRONTEND=noninteractive
RUN apt update
RUN apt install git build-essential libopencv-dev wget libssl-dev -y
RUN wget https://github.com/Kitware/CMake/releases/download/v3.16.2/cmake-3.16.2.tar.gz && \
tar -xzf cmake-*.tar.gz && cd cmake-* && ./bootstrap && make -j 4 && make install
RUN wget http://fftw.org/fftw-3.3.8.tar.gz && tar -xzf fftw-3.3.8.tar.gz && cd fftw-3.3.8 && ./configure --enable-shared --disable-static --enable-threads --with-combined-threads --enable-portable-binary CFLAGS='-fPIC' && make -j 4 && make install
RUN wget https://dl.google.com/go/go1.14.2.linux-amd64.tar.gz && tar -C /usr/local -xzf go1.14.2.linux-amd64.tar.gz
WORKDIR /build/
RUN git clone --recursive https://github.com/simon987/fastimagehash
WORKDIR /build/fastimagehash
RUN cmake .
RUN make -j 4 && make install
WORKDIR /build/
COPY . /build/imhashdb
WORKDIR /build/imhashdb/cli
RUN PATH=$PATH:/usr/local/go/bin go build .
ENV LD_LIBRARY_PATH /usr/local/lib/
ENTRYPOINT ["/build/imhashdb/cli/cli"]

View File

@ -1,3 +1,7 @@
# imhashdb
wip
### Requirements
1. PostgreSQL
1. Redis
1. [pg_hamming](https://github.com/simon987/pg_hamming)
1. [fastimagehash](https://github.com/simon987/fastimagehash)

View File

@ -89,6 +89,13 @@ func main() {
EnvVars: []string{"IMHASHDB_QUERY_CONCURRENCY"},
Destination: &Conf.QueryConcurrency,
},
&cli.StringFlag{
Name: "store",
Value: "",
Usage: "If set, store downloaded images there",
EnvVars: []string{"IMHASHDB_STORE"},
Destination: &Conf.Store,
},
},
},
{

View File

@ -12,7 +12,6 @@ import (
"strings"
)
const RedisPrefix = "q."
const UserAgent = "imhashdb/v1.0"
var ImageSuffixes = []string{
@ -37,6 +36,8 @@ type Config struct {
HasherConcurrency int
QueryConcurrency int
Store string
ImgurClientId string
HasherPattern string
}

View File

@ -4,8 +4,13 @@ import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"hash/crc32"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"time"
@ -51,6 +56,29 @@ func worker(queue chan []string) {
}
}
func storeData(data []byte, sha1 [20]byte, link string) {
sha1Str := hex.EncodeToString(sha1[:])
filename := fmt.Sprintf("%s/%c/%s/",
DataPath,
sha1Str[0],
sha1Str[1:3],
)
err := os.MkdirAll(filename, 0755)
if err != nil {
panic(err)
}
filename += sha1Str + filepath.Ext(link)
Logger.Debug("Storing image data to file", zap.String("path", filename))
err = ioutil.WriteFile(filename, data, 0666)
if err != nil {
panic(err)
}
}
func computeAndStore(rawTask []string) {
var task Task
err := json.Unmarshal([]byte(rawTask[1]), &task)
@ -61,7 +89,7 @@ func computeAndStore(rawTask []string) {
meta := []Meta{{
RetrievedAt: time.Now().Unix(),
Id: rawTask[0][len(RedisPrefix):] + "." + strconv.FormatInt(task.Id, 10),
Id: rawTask[0][len(Pattern)-1:] + "." + strconv.FormatInt(task.Id, 10),
Meta: []byte(rawTask[1]),
}}
@ -86,11 +114,17 @@ func computeAndStore(rawTask []string) {
return
}
sha1sum := sha1.Sum(data)
if StoreData {
storeData(data, sha1sum, link)
}
Store(&Entry{
H: h,
Size: len(data),
Sha256: sha256.Sum256(data),
Sha1: sha1.Sum(data),
Sha1: sha1sum,
Md5: md5.Sum(data),
Crc32: crc32.ChecksumIEEE(data),
Meta: meta,
@ -110,6 +144,10 @@ func trimUrl(link string) string {
return link
}
var StoreData = Conf.Store != ""
var DataPath = Conf.Store
var Pattern = "imhash.*"
func Main() error {
queue := make(chan []string)
@ -117,5 +155,5 @@ func Main() error {
go worker(queue)
}
return dispatchFromQueue("q.reddit.*", queue)
return dispatchFromQueue(Pattern, queue)
}