mirror of
https://github.com/simon987/imhashdb.git
synced 2025-04-04 02:22:58 +00:00
Option to store data on disk, dockerfile
This commit is contained in:
parent
8d7bc2d133
commit
a25b5072a4
34
Dockerfile
Normal file
34
Dockerfile
Normal file
@ -0,0 +1,34 @@
|
||||
FROM ubuntu
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt update
|
||||
RUN apt install git build-essential libopencv-dev wget libssl-dev -y
|
||||
|
||||
RUN wget https://github.com/Kitware/CMake/releases/download/v3.16.2/cmake-3.16.2.tar.gz && \
|
||||
tar -xzf cmake-*.tar.gz && cd cmake-* && ./bootstrap && make -j 4 && make install
|
||||
|
||||
RUN wget http://fftw.org/fftw-3.3.8.tar.gz && tar -xzf fftw-3.3.8.tar.gz && cd fftw-3.3.8 && ./configure --enable-shared --disable-static --enable-threads --with-combined-threads --enable-portable-binary CFLAGS='-fPIC' && make -j 4 && make install
|
||||
|
||||
RUN wget https://dl.google.com/go/go1.14.2.linux-amd64.tar.gz && tar -C /usr/local -xzf go1.14.2.linux-amd64.tar.gz
|
||||
|
||||
WORKDIR /build/
|
||||
|
||||
RUN git clone --recursive https://github.com/simon987/fastimagehash
|
||||
|
||||
WORKDIR /build/fastimagehash
|
||||
|
||||
RUN cmake .
|
||||
RUN make -j 4 && make install
|
||||
|
||||
WORKDIR /build/
|
||||
|
||||
COPY . /build/imhashdb
|
||||
|
||||
WORKDIR /build/imhashdb/cli
|
||||
|
||||
RUN PATH=$PATH:/usr/local/go/bin go build .
|
||||
|
||||
ENV LD_LIBRARY_PATH /usr/local/lib/
|
||||
|
||||
ENTRYPOINT ["/build/imhashdb/cli/cli"]
|
@ -1,3 +1,7 @@
|
||||
# imhashdb
|
||||
|
||||
wip
|
||||
### Requirements
|
||||
1. PostgreSQL
|
||||
1. Redis
|
||||
1. [pg_hamming](https://github.com/simon987/pg_hamming)
|
||||
1. [fastimagehash](https://github.com/simon987/fastimagehash)
|
||||
|
@ -89,6 +89,13 @@ func main() {
|
||||
EnvVars: []string{"IMHASHDB_QUERY_CONCURRENCY"},
|
||||
Destination: &Conf.QueryConcurrency,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "store",
|
||||
Value: "",
|
||||
Usage: "If set, store downloaded images there",
|
||||
EnvVars: []string{"IMHASHDB_STORE"},
|
||||
Destination: &Conf.Store,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
|
3
core.go
3
core.go
@ -12,7 +12,6 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
const RedisPrefix = "q."
|
||||
const UserAgent = "imhashdb/v1.0"
|
||||
|
||||
var ImageSuffixes = []string{
|
||||
@ -37,6 +36,8 @@ type Config struct {
|
||||
HasherConcurrency int
|
||||
QueryConcurrency int
|
||||
|
||||
Store string
|
||||
|
||||
ImgurClientId string
|
||||
HasherPattern string
|
||||
}
|
||||
|
@ -4,8 +4,13 @@ import (
|
||||
"crypto/md5"
|
||||
"crypto/sha1"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@ -51,6 +56,29 @@ func worker(queue chan []string) {
|
||||
}
|
||||
}
|
||||
|
||||
func storeData(data []byte, sha1 [20]byte, link string) {
|
||||
|
||||
sha1Str := hex.EncodeToString(sha1[:])
|
||||
|
||||
filename := fmt.Sprintf("%s/%c/%s/",
|
||||
DataPath,
|
||||
sha1Str[0],
|
||||
sha1Str[1:3],
|
||||
)
|
||||
err := os.MkdirAll(filename, 0755)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
filename += sha1Str + filepath.Ext(link)
|
||||
|
||||
Logger.Debug("Storing image data to file", zap.String("path", filename))
|
||||
|
||||
err = ioutil.WriteFile(filename, data, 0666)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func computeAndStore(rawTask []string) {
|
||||
var task Task
|
||||
err := json.Unmarshal([]byte(rawTask[1]), &task)
|
||||
@ -61,7 +89,7 @@ func computeAndStore(rawTask []string) {
|
||||
|
||||
meta := []Meta{{
|
||||
RetrievedAt: time.Now().Unix(),
|
||||
Id: rawTask[0][len(RedisPrefix):] + "." + strconv.FormatInt(task.Id, 10),
|
||||
Id: rawTask[0][len(Pattern)-1:] + "." + strconv.FormatInt(task.Id, 10),
|
||||
Meta: []byte(rawTask[1]),
|
||||
}}
|
||||
|
||||
@ -86,11 +114,17 @@ func computeAndStore(rawTask []string) {
|
||||
return
|
||||
}
|
||||
|
||||
sha1sum := sha1.Sum(data)
|
||||
|
||||
if StoreData {
|
||||
storeData(data, sha1sum, link)
|
||||
}
|
||||
|
||||
Store(&Entry{
|
||||
H: h,
|
||||
Size: len(data),
|
||||
Sha256: sha256.Sum256(data),
|
||||
Sha1: sha1.Sum(data),
|
||||
Sha1: sha1sum,
|
||||
Md5: md5.Sum(data),
|
||||
Crc32: crc32.ChecksumIEEE(data),
|
||||
Meta: meta,
|
||||
@ -110,6 +144,10 @@ func trimUrl(link string) string {
|
||||
return link
|
||||
}
|
||||
|
||||
var StoreData = Conf.Store != ""
|
||||
var DataPath = Conf.Store
|
||||
var Pattern = "imhash.*"
|
||||
|
||||
func Main() error {
|
||||
queue := make(chan []string)
|
||||
|
||||
@ -117,5 +155,5 @@ func Main() error {
|
||||
go worker(queue)
|
||||
}
|
||||
|
||||
return dispatchFromQueue("q.reddit.*", queue)
|
||||
return dispatchFromQueue(Pattern, queue)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user