diff --git a/README.md b/README.md index 52450ae..67ddb39 100644 --- a/README.md +++ b/README.md @@ -13,21 +13,42 @@ replacement for C/C++.

-*[\*benchmarks](bench/)* - ### Example usage -```C++ +```C #include "fastimagehash.h" int main() { - // TODO + unsigned char result[HASH_SIZE]; + + phash_file("image.jpeg", result, HASH_SIZE, HIGHFREQ_FACTOR); } ``` +For slight additional performance gains, `libfastimagehash` can +compute all hashes at once instead of decoding the same +image at each step. +

+ +

+ +*[\*See all benchmarks](bench/)* + + ### Build from source -// TODO +```bash +# Download dependencies +apt install libopencv-dev libfftw3-dev cmake + +# Checkout source +git clone --recursive https://github.com/simon987/fastimagehash + +# Build +cmake . +make +``` + **Built with** * [opencv](https://github.com/opencv) for image decoding & resizing diff --git a/bench/README.md b/bench/README.md index 52e78ad..7d4842c 100644 --- a/bench/README.md +++ b/bench/README.md @@ -29,3 +29,7 @@ fastimagehash v0.1 **ahash** ![ahash_s](results/ahash_small.png) ![ahash_l](results/ahash_large.png) + +**multi_hash** +![multi_s](results/multi_small.png) +![multi_l](results/multi_large.png) diff --git a/bench/benchmark.py b/bench/benchmark.py index d1ceb76..b937dce 100644 --- a/bench/benchmark.py +++ b/bench/benchmark.py @@ -36,3 +36,16 @@ print_result("ahash", timeit.timeit( stmt="average_hash(Image.open('%s'), hash_size=%d)" % (IMAGE, SIZE), number=COUNT )) + +print_result("multi", timeit.timeit( + setup="from imagehash import average_hash,phash,whash,dhash \n" + "from PIL import Image", + stmt="im = Image.open('%s');" + "size = %d;" + "average_hash(im.copy(), hash_size=size);" + "phash(im.copy(), hash_size=size);" + "whash(im.copy(), hash_size=size, remove_max_haar_ll=False);" + "dhash(im.copy(), hash_size=size);" + % (IMAGE, SIZE), + number=COUNT +)) diff --git a/bench/results/multi_large.png b/bench/results/multi_large.png new file mode 100644 index 0000000..3d86b55 Binary files /dev/null and b/bench/results/multi_large.png differ diff --git a/bench/results/multi_small.png b/bench/results/multi_small.png new file mode 100644 index 0000000..acf68a9 Binary files /dev/null and b/bench/results/multi_small.png differ diff --git a/bench/run.py b/bench/run.py index 6abda3b..1fb061b 100644 --- a/bench/run.py +++ b/bench/run.py @@ -34,4 +34,6 @@ for f in files: method = "ahash" if "whash" in m: method = "whash" + if "multi" in m: + method = "multi" print("%s_%s,%s" % (f, method, t)) diff --git a/benchmark.cpp b/benchmark.cpp index 6b95add..b5f7def 100644 --- a/benchmark.cpp +++ b/benchmark.cpp @@ -67,10 +67,28 @@ static void BM_ahash(benchmark::State &state) { free(buf); } +static void BM_multi(benchmark::State &state) { + + size_t size; + void *buf = load_test_file(&size); + + multi_hash_t *m = multi_hash_create(state.range()); + + for (auto _ : state) { + multi_hash_file(filepath, m, state.range(), 4, 0); + } + + multi_hash_destroy(m); + + free(buf); +} + + BENCHMARK(BM_phash)->ArgName("size")->Arg(8); BENCHMARK(BM_whash)->ArgName("size")->Arg(8); BENCHMARK(BM_dhash)->ArgName("size")->Arg(8); BENCHMARK(BM_ahash)->ArgName("size")->Arg(8); +BENCHMARK(BM_multi)->ArgName("size")->Arg(8); int main(int argc, char **argv) { diff --git a/fastimagehash.cpp b/fastimagehash.cpp index 08e1344..3cb2750 100644 --- a/fastimagehash.cpp +++ b/fastimagehash.cpp @@ -119,7 +119,7 @@ int ahash_mem(void *buf, uchar *out, size_t buf_len, int hash_size) { uchar *pixel = im.ptr(0); int endPixel = im.cols * im.rows; - for (int i = 0; i <= endPixel; i++) { + for (int i = 0; i < endPixel; i++) { set_bit_at(out, i, pixel[i] > avg); } return 0; @@ -213,7 +213,7 @@ int whash_mem(void *buf, uchar *out, size_t buf_len, int hash_size, int img_scal uchar *pixel = im.ptr(0); const int endPixel = im.cols * im.rows; - for (int i = 0; i <= endPixel; i++) { + for (int i = 0; i < endPixel; i++) { data[i] = (double) pixel[i] / 255; } @@ -265,7 +265,7 @@ int phash_mem(void *buf, uchar *out, size_t buf_len, int hash_size, int highfreq uchar *pixel = im.ptr(0); int endPixel = im.cols * im.rows; - for (int i = 0; i <= endPixel; i++) { + for (int i = 0; i < endPixel; i++) { pixels[i] = (double) pixel[i] / 255; } @@ -302,3 +302,169 @@ int phash_mem(void *buf, uchar *out, size_t buf_len, int hash_size, int highfreq return 0; } +multi_hash_t *multi_hash_create(int hash_size) { + auto multi_hash = (multi_hash_t *) malloc(sizeof(multi_hash_t)); + auto data = (uchar *) malloc((hash_size + 1) * 4); + + multi_hash->ahash = data; + multi_hash->phash = data + (hash_size + 1); + multi_hash->dhash = data + (hash_size + 1) * 2; + multi_hash->whash = data + (hash_size + 1) * 3; + + return multi_hash; +} + +void multi_hash_destroy(multi_hash_t *h) { + free(h->ahash); + free(h); +} + +int multi_hash_file(const char *filepath, multi_hash_t *out, int hash_size, + int ph_highfreq_factor, int wh_img_scale) { + + size_t size; + void *buf = load_file_in_mem(filepath, &size); + + if (buf == nullptr) { + return FASTIMAGEHASH_ERR; + } + + int ret = multi_hash_mem(buf, out, size, hash_size, ph_highfreq_factor, wh_img_scale); + free(buf); + return ret; +} + +int multi_hash_mem(void *buf, multi_hash_t *out, size_t buf_len, + int hash_size, int ph_highfreq_factor, int wh_img_scale) { + + Mat im; + try { + im = imdecode(Mat(1, buf_len, CV_8UC1, buf), IMREAD_GRAYSCALE); + } catch (Exception &e) { + return FASTIMAGEHASH_ERR; + } + + Mat ahash_im; + Mat dhash_im; + Mat phash_im; + Mat whash_im; + + int ph_img_scale = hash_size * ph_highfreq_factor; + + if ((hash_size & (hash_size - 1)) != 0) { + throw std::invalid_argument("hash_size must be a power of two"); + } + + if (wh_img_scale != 0) { + if ((wh_img_scale & (wh_img_scale - 1)) != 0) { + throw std::invalid_argument("wh_img_scale must be a power of two"); + } + } else { + int image_natural_scale = (int) pow(2, (int) log2(MIN(im.rows, im.cols))); + wh_img_scale = MAX(image_natural_scale, hash_size); + } + + int ll_max_level = (int) log2(wh_img_scale); + int level = (int) log2(hash_size); + + if (ll_max_level < level) { + throw std::invalid_argument("hash_size in a wrong range"); + } + + int dwt_level = ll_max_level - level; + + try { + im = imdecode(Mat(1, buf_len, CV_8UC1, buf), IMREAD_GRAYSCALE); + + resize(im, ahash_im, Size(hash_size, hash_size), 0, 0, INTER_AREA); + resize(im, dhash_im, Size(hash_size + 1, hash_size), 0, 0, INTER_AREA); + resize(im, whash_im, Size(wh_img_scale, wh_img_scale), 0, 0, INTER_AREA); + resize(im, phash_im, Size(ph_img_scale, ph_img_scale), 0, 0, INTER_AREA); + + } catch (Exception &e) { + return FASTIMAGEHASH_ERR; + } + + double *pixels = new double[MAX(ph_img_scale, wh_img_scale) * MAX(ph_img_scale, wh_img_scale)]; + + // ahash + double avg = mean(ahash_im).val[0]; + + uchar *pixel = ahash_im.ptr(0); + int endPixel = ahash_im.cols * ahash_im.rows; + for (int i = 0; i < endPixel; i++) { + set_bit_at(out->ahash, i, pixel[i] > avg); + } + + //dhash + int offset = 0; + for (int i = 0; i < dhash_im.rows; ++i) { + pixel = dhash_im.ptr(i); + + for (int j = 1; j < dhash_im.cols; ++j) { + set_bit_at(out->dhash, offset++, pixel[j] > pixel[j - 1]); + } + } + + //phash + pixel = phash_im.ptr(0); + endPixel = phash_im.cols * phash_im.rows; + for (int i = 0; i < endPixel; i++) { + pixels[i] = (double) pixel[i] / 255; + } + + double dct_out[ph_img_scale * ph_img_scale]; + fftw_plan plan = fftw_plan_r2r_2d( + ph_img_scale, ph_img_scale, + pixels, dct_out, + FFTW_REDFT10, FFTW_REDFT10, // DCT-II + FFTW_ESTIMATE + ); + fftw_execute(plan); + fftw_destroy_plan(plan); + + double dct_lowfreq[hash_size * hash_size]; + double sorted[hash_size * hash_size]; + + int ptr_low = 0; + int ptr = 0; + for (int i = 0; i < hash_size; ++i) { + for (int j = 0; j < hash_size; ++j) { + dct_lowfreq[ptr_low] = dct_out[ptr]; + sorted[ptr_low] = dct_out[ptr]; + ptr_low += 1; + ptr += 1; + } + ptr += (ph_img_scale - hash_size); + } + + double med = median(sorted, hash_size * hash_size); + + for (int i = 0; i < hash_size * hash_size; ++i) { + set_bit_at(out->phash, i, dct_lowfreq[i] > med); + } + + //whash + pixel = whash_im.ptr(0); + endPixel = whash_im.cols * whash_im.rows; + for (int i = 0; i < endPixel; i++) { + pixels[i] = (double) pixel[i] / 255; + } + + //TODO: haar option + wave_object w = wave_init("haar"); + wt2_object wt = wt2_init(w, "dwt", wh_img_scale, wh_img_scale, dwt_level); + + double *coeffs = dwt2(wt, pixels); + + memcpy(sorted, coeffs, sizeof(double) * (hash_size * hash_size)); + + med = median(sorted, hash_size * hash_size); + + for (int i = 0; i < hash_size * hash_size; ++i) { + set_bit_at(out->whash, i, coeffs[i] > med); + } + + delete[] pixels; + return 0; +} diff --git a/fastimagehash.h b/fastimagehash.h index b520921..0e767d6 100644 --- a/fastimagehash.h +++ b/fastimagehash.h @@ -8,10 +8,25 @@ typedef unsigned char uchar; +typedef struct multi_hash { + uchar *ahash; + uchar *phash; + uchar *dhash; + uchar *whash; +} multi_hash_t; + #ifdef __cplusplus extern "C" { #endif +multi_hash_t *multi_hash_create(int hash_size); + +void multi_hash_destroy(multi_hash_t *h); + +int multi_hash_file(const char *filepath, multi_hash_t *out, int hash_size, int ph_highfreq_factor, int wh_img_scale); + +int multi_hash_mem(void *buf, multi_hash_t *out, size_t buf_len, int hash_size, int ph_highfreq_factor, int wh_img_scale); + void hash_to_hex_string_reversed(const uchar *h, char *out, int hash_size); void hash_to_hex_string(const uchar *h, char *out, int hash_size); diff --git a/imhash.c b/imhash.c index aebc0e2..f748442 100644 --- a/imhash.c +++ b/imhash.c @@ -54,6 +54,18 @@ int main(int argc, char *argv[]) { printf("%s\tw:%s\n", argv[i], hashstr); } } + + multi_hash_t *m = multi_hash_create(8); + multi_hash_file(argv[i], m, 8, 4, 0); + + hash_to_hex_string_reversed(m->phash, hashstr, 8); + printf("%s\tmp:%s\n", argv[i], hashstr); + hash_to_hex_string_reversed(m->ahash, hashstr, 8); + printf("%s\tma:%s\n", argv[i], hashstr); + hash_to_hex_string_reversed(m->dhash, hashstr, 8); + printf("%s\tmd:%s\n", argv[i], hashstr); + hash_to_hex_string_reversed(m->whash, hashstr, 8); + printf("%s\tmw:%s\n", argv[i], hashstr); } } } \ No newline at end of file