commit 0bfac5fdc08f1b8718bf756f13df23e765e0755e Author: simon987 Date: Wed Dec 25 08:55:07 2019 -0500 Initial commit (squashed) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9f59a01 --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +.idea/ +*.iml +*.png +*.tar.gz +*.cbp +*.a +*.so +bm +bench/*.csv +bench/*.ods +perf.* +CMakeCache.txt +CMakeFiles/ +cmake-build-debug +cmake_install.cmake +Makefile + +# wavelib stuff +Bin/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..ed0822f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "thirdparty/wavelib"] + path = thirdparty/wavelib + url = https://github.com/simon987/wavelib +[submodule "thirdparty/benchmark"] + path = thirdparty/benchmark + url = https://github.com/google/benchmark diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..36f8efa --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,67 @@ +cmake_minimum_required(VERSION 3.15) +project(fastimagehash) + + +set(BUILD_UT OFF) +add_subdirectory(thirdparty/wavelib) + +set(BENCHMARK_ENABLE_GTEST_TESTS OFF) +set(BENCHMARK_ENABLE_TESTING OFF) +set(BENCHMARK_ENABLE_INSTALL OFF) +set(CMAKE_BUILD_TYPE RELEASE) +add_subdirectory(thirdparty/benchmark) + +set(CMAKE_CXX_STANDARD 14) + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "cmake/") + +find_package(OpenCV REQUIRED) +find_package(FFTW REQUIRED) + +add_library( + fastimagehash + SHARED + fastimagehash.cpp fastimagehash.h +) + +target_include_directories( + fastimagehash + PUBLIC + ${CMAKE_SOURCE_DIR}/thirdparty/wavelib/header/ + ${OpenCV_INCLUDE_DIRS} + ${FFTW_INCLUDE_DIRS} +) + +target_link_libraries( + fastimagehash + ${OpenCV_LIBS} + ${FFTW_LIBRARIES} + wavelib + pthread +) + +target_compile_options( + fastimagehash + PRIVATE + -Ofast + -march=native + -fno-stack-protector + -fomit-frame-pointer + -freciprocal-math +) + +add_executable(bm benchmark.cpp benchmark.cpp) +target_link_libraries( + bm + fastimagehash + benchmark +) +set_target_properties( + bm + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/bench/" +) + +add_dependencies(fastimagehash wavelib) +add_dependencies(bm fastimagehash) +add_dependencies(bm benchmark) diff --git a/bench/1000px b/bench/1000px new file mode 100644 index 0000000..796a36d Binary files /dev/null and b/bench/1000px differ diff --git a/bench/100px b/bench/100px new file mode 100644 index 0000000..50adbcd Binary files /dev/null and b/bench/100px differ diff --git a/bench/2000px b/bench/2000px new file mode 100644 index 0000000..55c3ae1 Binary files /dev/null and b/bench/2000px differ diff --git a/bench/200px b/bench/200px new file mode 100644 index 0000000..1f450aa Binary files /dev/null and b/bench/200px differ diff --git a/bench/3000px b/bench/3000px new file mode 100644 index 0000000..613904f Binary files /dev/null and b/bench/3000px differ diff --git a/bench/300px b/bench/300px new file mode 100644 index 0000000..c05776f Binary files /dev/null and b/bench/300px differ diff --git a/bench/4000px b/bench/4000px new file mode 100644 index 0000000..58a3429 Binary files /dev/null and b/bench/4000px differ diff --git a/bench/500px b/bench/500px new file mode 100644 index 0000000..4fc3e04 Binary files /dev/null and b/bench/500px differ diff --git a/bench/6000px b/bench/6000px new file mode 100644 index 0000000..655a649 Binary files /dev/null and b/bench/6000px differ diff --git a/bench/8000px b/bench/8000px new file mode 100644 index 0000000..fa6e5f4 Binary files /dev/null and b/bench/8000px differ diff --git a/bench/benchmark.py b/bench/benchmark.py new file mode 100644 index 0000000..d1ceb76 --- /dev/null +++ b/bench/benchmark.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +import timeit +import sys + +IMAGE = sys.argv[1] +COUNT = 20 +SIZE = 8 + +def print_result(method, time): + print("%s_%s,%d" % (IMAGE, method, time / COUNT * 1000000000)) + +print_result("phash", timeit.timeit( + setup="from imagehash import phash \n" + "from PIL import Image", + stmt="phash(Image.open('%s'), hash_size=%d)" % (IMAGE, SIZE), + number=COUNT +)) + +print_result("whash", timeit.timeit( + setup="from imagehash import whash \n" + "from PIL import Image", + stmt="whash(Image.open('%s'), hash_size=%d, remove_max_haar_ll=False)" % (IMAGE, SIZE), + number=COUNT +)) + +print_result("dhash", timeit.timeit( + setup="from imagehash import dhash \n" + "from PIL import Image", + stmt="dhash(Image.open('%s'), hash_size=%d)" % (IMAGE, SIZE), + number=COUNT +)) + +print_result("ahash", timeit.timeit( + setup="from imagehash import average_hash \n" + "from PIL import Image", + stmt="average_hash(Image.open('%s'), hash_size=%d)" % (IMAGE, SIZE), + number=COUNT +)) diff --git a/bench/results/bench.csv b/bench/results/bench.csv new file mode 100644 index 0000000..78115c1 --- /dev/null +++ b/bench/results/bench.csv @@ -0,0 +1,81 @@ +100px_phash,1501752 +100px_whash,1191232 +100px_dhash,627767 +100px_ahash,559005 +200px_phash,1890701 +200px_whash,1705581 +200px_dhash,764158 +200px_ahash,773574 +300px_phash,3349884 +300px_whash,3585630 +300px_dhash,2165022 +300px_ahash,2138236 +500px_phash,8181995 +500px_whash,16893975 +500px_dhash,6583327 +500px_ahash,6129121 +1000px_phash,33332575 +1000px_whash,32719635 +1000px_dhash,22770614 +1000px_ahash,17089768 +2000px_phash,66180354 +2000px_whash,101643309 +2000px_dhash,55663749 +2000px_ahash,57924298 +3000px_phash,125681339 +3000px_whash,228129635 +3000px_dhash,138621595 +3000px_ahash,97757975 +4000px_phash,275259074 +4000px_whash,501619747 +4000px_dhash,207196780 +4000px_ahash,302886693 +6000px_phash,509748444 +6000px_whash,749504047 +6000px_dhash,510110532 +6000px_ahash,651134350 +8000px_phash,804032051 +8000px_whash,1561515488 +8000px_dhash,935896765 +8000px_ahash,797512191 + +100px_phash,156781 +100px_whash,173199 +100px_dhash,90662.8 +100px_ahash,101213 +200px_phash,700678 +200px_whash,859678 +200px_dhash,452687 +200px_ahash,387735 +300px_phash,1.25202e+06 +300px_whash,1.8245e+06 +300px_dhash,1.25765e+06 +300px_ahash,973228 +500px_phash,2.38869e+06 +500px_whash,1.1267e+07 +500px_dhash,2.17194e+06 +500px_ahash,2.2334e+06 +1000px_phash,5.74643e+06 +1000px_whash,1.16069e+07 +1000px_dhash,5.07346e+06 +1000px_ahash,2.5366e+06 +2000px_phash,2.65802e+07 +2000px_whash,6.72166e+07 +2000px_dhash,1.84348e+07 +2000px_ahash,8.71788e+06 +3000px_phash,4.03586e+07 +3000px_whash,8.8327e+07 +3000px_dhash,3.53093e+07 +3000px_ahash,1.79755e+07 +4000px_phash,8.40194e+07 +4000px_whash,4.82593e+08 +4000px_dhash,9.43476e+07 +4000px_ahash,6.51547e+07 +6000px_phash,2.28308e+08 +6000px_whash,4.19073e+08 +6000px_dhash,2.12647e+08 +6000px_ahash,2.03283e+08 +8000px_phash,2.38032e+08 +8000px_whash,1.19931e+09 +8000px_dhash,3.05067e+08 +8000px_ahash,2.73826e+08 diff --git a/bench/results/bench2.csv b/bench/results/bench2.csv new file mode 100644 index 0000000..3312fa6 --- /dev/null +++ b/bench/results/bench2.csv @@ -0,0 +1,81 @@ +100px_phash,1546245 +100px_whash,1142629 +100px_dhash,422119 +100px_ahash,459319 +200px_phash,2289901 +200px_whash,1982601 +200px_dhash,877761 +200px_ahash,893851 +300px_phash,5754956 +300px_whash,4130921 +300px_dhash,2337779 +300px_ahash,3306587 +500px_phash,16423604 +500px_whash,25278172 +500px_dhash,13543857 +500px_ahash,12730359 +1000px_phash,38916426 +1000px_whash,43298265 +1000px_dhash,22826313 +1000px_ahash,23075597 +2000px_phash,108740058 +2000px_whash,130074288 +2000px_dhash,86906159 +2000px_ahash,82882133 +3000px_phash,157641043 +3000px_whash,194007627 +3000px_dhash,115356997 +3000px_ahash,167002746 +4000px_phash,320478682 +4000px_whash,611399093 +4000px_dhash,258676239 +4000px_ahash,278319008 +6000px_phash,663131052 +6000px_whash,968925343 +6000px_dhash,603457884 +6000px_ahash,590223791 +8000px_phash,968315239 +8000px_whash,2128619381 +8000px_dhash,810395862 +8000px_ahash,825277357 + +100px_phash,172159 +100px_whash,289971 +100px_dhash,114212 +100px_ahash,98125.5 +200px_phash,367338 +200px_whash,656732 +200px_dhash,249119 +200px_ahash,343462 +300px_phash,1.28685e+06 +300px_whash,1.39146e+06 +300px_dhash,1.14464e+06 +300px_ahash,900021 +500px_phash,2.22673e+06 +500px_whash,7.74973e+06 +500px_dhash,2.12625e+06 +500px_ahash,2.89128e+06 +1000px_phash,7.79976e+06 +1000px_whash,1.70092e+07 +1000px_dhash,7.78873e+06 +1000px_ahash,2.82737e+06 +2000px_phash,2.49954e+07 +2000px_whash,1.24924e+08 +2000px_dhash,4.11592e+07 +2000px_ahash,2.0187e+07 +3000px_phash,3.89911e+07 +3000px_whash,9.88339e+07 +3000px_dhash,3.39018e+07 +3000px_ahash,2.53518e+07 +4000px_phash,8.36112e+07 +4000px_whash,2.41845e+08 +4000px_dhash,4.09667e+07 +4000px_ahash,4.06429e+07 +6000px_phash,1.79905e+08 +6000px_whash,3.62966e+08 +6000px_dhash,1.73798e+08 +6000px_ahash,1.19691e+08 +8000px_phash,2.35173e+08 +8000px_whash,1.18053e+09 +8000px_dhash,2.61731e+08 +8000px_ahash,2.33739e+08 diff --git a/bench/results/bench3.csv b/bench/results/bench3.csv new file mode 100644 index 0000000..4d8259d --- /dev/null +++ b/bench/results/bench3.csv @@ -0,0 +1,81 @@ +100px_phash,2382869 +100px_whash,1521012 +100px_dhash,580412 +100px_ahash,603286 +200px_phash,1926447 +200px_whash,1768162 +200px_dhash,774964 +200px_ahash,771083 +300px_phash,3790657 +300px_whash,3626704 +300px_dhash,2067537 +300px_ahash,2069114 +500px_phash,10906352 +500px_whash,22738215 +500px_dhash,10272449 +500px_ahash,9595167 +1000px_phash,23141506 +1000px_whash,25170109 +1000px_dhash,14824129 +1000px_ahash,15091460 +2000px_phash,57189668 +2000px_whash,108344237 +2000px_dhash,62995725 +2000px_ahash,62746881 +3000px_phash,138799963 +3000px_whash,217013121 +3000px_dhash,106064767 +3000px_ahash,161430438 +4000px_phash,242516033 +4000px_whash,513381749 +4000px_dhash,244478546 +4000px_ahash,334389817 +6000px_phash,541641403 +6000px_whash,825888963 +6000px_dhash,458842570 +6000px_ahash,487875492 +8000px_phash,882104246 +8000px_whash,1805513081 +8000px_dhash,763400049 +8000px_ahash,717820524 + +100px_phash,216477 +100px_whash,242169 +100px_dhash,105712 +100px_ahash,115431 +200px_phash,473393 +200px_whash,599663 +200px_dhash,184172 +200px_ahash,206734 +300px_phash,886085 +300px_whash,1.17681e+06 +300px_dhash,758226 +300px_ahash,825329 +500px_phash,1.92917e+06 +500px_whash,7.30992e+06 +500px_dhash,2.67147e+06 +500px_ahash,2.45408e+06 +1000px_phash,5.96402e+06 +1000px_whash,9.16642e+06 +1000px_dhash,5.30646e+06 +1000px_ahash,2.40083e+06 +2000px_phash,1.54843e+07 +2000px_whash,4.93767e+07 +2000px_dhash,1.98073e+07 +2000px_ahash,1.07622e+07 +3000px_phash,3.11459e+07 +3000px_whash,6.19604e+07 +3000px_dhash,3.0431e+07 +3000px_ahash,1.78016e+07 +4000px_phash,7.47504e+07 +4000px_whash,2.19254e+08 +4000px_dhash,5.78558e+07 +4000px_ahash,4.9799e+07 +6000px_phash,1.67584e+08 +6000px_whash,3.67142e+08 +6000px_dhash,1.94565e+08 +6000px_ahash,1.40676e+08 +8000px_phash,2.0177e+08 +8000px_whash,1.36282e+09 +8000px_dhash,2.27323e+08 +8000px_ahash,2.25973e+08 diff --git a/bench/run.py b/bench/run.py new file mode 100644 index 0000000..6abda3b --- /dev/null +++ b/bench/run.py @@ -0,0 +1,37 @@ +from subprocess import check_output, DEVNULL +import csv + +files = ( + "100px", + "200px", + "300px", + "500px", + "1000px", + "2000px", + "3000px", + "4000px", + "6000px", + "8000px", +) + +for f in files: + out = check_output(["python", "benchmark.py", f]) + print(out.decode(), end="") + +print() + +for f in files: + out = check_output(["./bm", f, "--benchmark_format=csv"], stderr=DEVNULL) + + for line in out.decode().splitlines(keepends=False): + if line.startswith("\"BM_"): + m, _, t, *_ = line.split(",") + if "phash" in m: + method = "phash" + if "dhash" in m: + method = "dhash" + if "ahash" in m: + method = "ahash" + if "whash" in m: + method = "whash" + print("%s_%s,%s" % (f, method, t)) diff --git a/benchmark.cpp b/benchmark.cpp new file mode 100644 index 0000000..cad9a2b --- /dev/null +++ b/benchmark.cpp @@ -0,0 +1,83 @@ +#include +#include "fastimagehash.h" + +#include + +char *filepath; + +void *load_test_file(size_t *buf_len) { + FILE *file = fopen(filepath, "rb"); + + fseek(file, 0, SEEK_END); + *buf_len = ftell(file); + fclose(file); + + void *buf = malloc(*buf_len); + file = fopen(filepath, "rb"); + fread(buf, *buf_len, 1, file); + return buf; +} + +static void BM_phash(benchmark::State &state) { + + size_t size; + void *buf = load_test_file(&size); + + for (auto _ : state) { + phash(buf, size, state.range(), 4); + } + + free(buf); +} + +static void BM_whash(benchmark::State &state) { + + size_t size; + void *buf = load_test_file(&size); + + for (auto _ : state) { + whash(buf, size, state.range(), 0); + } + + free(buf); +} + +static void BM_dhash(benchmark::State &state) { + + size_t size; + void *buf = load_test_file(&size); + + for (auto _ : state) { + dhash(buf, size, state.range()); + } + + free(buf); +} + +static void BM_ahash(benchmark::State &state) { + + size_t size; + void *buf = load_test_file(&size); + + for (auto _ : state) { + ahash(buf, size, state.range()); + } + + free(buf); +} + +BENCHMARK(BM_phash)->ArgName("size")->Arg(8); +BENCHMARK(BM_whash)->ArgName("size")->Arg(8); +BENCHMARK(BM_dhash)->ArgName("size")->Arg(8); +BENCHMARK(BM_ahash)->ArgName("size")->Arg(8); + + +int main(int argc, char **argv) { + filepath = argv[1]; + argv[1] = argv[0]; + + argc -= 1; + + ::benchmark::Initialize(&argc, argv + 1); + ::benchmark::RunSpecifiedBenchmarks(); +} diff --git a/cmake b/cmake new file mode 160000 index 0000000..05b6961 --- /dev/null +++ b/cmake @@ -0,0 +1 @@ +Subproject commit 05b696123f379245483f7b7a1ff4abeb6f490667 diff --git a/fastimagehash.cpp b/fastimagehash.cpp new file mode 100644 index 0000000..96a70cd --- /dev/null +++ b/fastimagehash.cpp @@ -0,0 +1,178 @@ +#include "fastimagehash.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace cv; + +__always_inline +double median(double *arr, size_t len) { + std::sort(arr, arr + len); + + //todo: odd len + return (arr[(len / 2) - 1] + arr[len / 2]) / 2; +} + +void printBitSet(std::vector *bs) { + + int len = bs->size(); + + for (int i = 3; i <= len; i += 4) { + std::cout << std::hex << + (((*bs)[i - 3] << 3) | ((*bs)[i - 2] << 2) | ((*bs)[i - 1] << 1) | ((*bs)[i])); + + } + std::cout << std::endl; +} + +void ahash(void *buf, size_t buf_len, int hash_size) { + + Mat im = imdecode(Mat(1, buf_len, CV_8UC1, buf), IMREAD_GRAYSCALE); + resize(im, im, Size(hash_size, hash_size), 0, 0, INTER_AREA); + + double avg = mean(im).val[0]; + + auto *hash = new std::vector(); + + uchar *pixel = im.ptr(0); + int endPixel = im.cols * im.rows; + for (int i = 0; i <= endPixel; i++) { + hash->push_back(pixel[i] > avg); + } + +// printBitSet(hash); + delete hash; +} + +void dhash(void *buf, size_t buf_len, int hash_size) { + + Mat im = imdecode(Mat(1, buf_len, CV_8UC1, buf), IMREAD_GRAYSCALE); + resize(im, im, Size(hash_size + 1, hash_size), 0, 0, INTER_AREA); + + auto *hash = new std::vector(); + + for (int i = 0; i < im.rows; ++i) { + uchar *pixel = im.ptr(i); + + for (int j = 1; j < im.cols; ++j) { + hash->push_back(pixel[j] > pixel[j - 1]); + } + } + +// printBitSet(hash); + delete hash; +} + +void whash(void *buf, size_t buf_len, int hash_size, int img_scale) { + + Mat im = imdecode(Mat(1, buf_len, CV_8UC1, buf), IMREAD_GRAYSCALE); + + if ((hash_size & (hash_size - 1)) != 0) { + throw std::invalid_argument("hash_size must be a power of two"); + } + + if (img_scale != 0) { + if ((img_scale & (img_scale - 1)) != 0) { + throw std::invalid_argument("img_scale must be a power of two"); + } + } else { + int image_natural_scale = (int) pow(2, (int)log2(MIN(im.rows, im.cols))); + img_scale = MAX(image_natural_scale, hash_size); + } + + int ll_max_level = (int) log2(img_scale); + int level = (int) log2(hash_size); + + if (ll_max_level < level) { + throw std::invalid_argument("hash_size in a wrong range"); + } + + + int dwt_level = ll_max_level - level; + + resize(im, im, Size(img_scale, img_scale), 0, 0, INTER_AREA); + + auto data = (double *) malloc(img_scale * img_scale * sizeof(double)); + + uchar *pixel = im.ptr(0); + const int endPixel = im.cols * im.rows; + for (int i = 0; i <= endPixel; i++) { + data[i] = (double) pixel[i] / 255; + } + + wave_object w = wave_init("haar"); + wt2_object wt = wt2_init(w, "dwt", img_scale, img_scale, dwt_level); + + double *coeffs = dwt2(wt, data); + free(data); + + double sorted[64]; + memcpy(sorted, coeffs, sizeof(double) * 64); + + double med = median(sorted, 64); + auto *hash = new std::vector(); + + for (int i = 0; i < hash_size * hash_size; ++i) { + hash->push_back(coeffs[i] > med); + } +// printBitSet(hash); + delete hash; +} + +void phash(void *buf, size_t buf_len, int hash_size, int highfreq_factor) { + + int img_size = hash_size * highfreq_factor; + + Mat im = imdecode(Mat(1, buf_len, CV_8UC1, buf), IMREAD_GRAYSCALE); + resize(im, im, Size(img_size, img_size), 0, 0, INTER_AREA); + + double pixels[img_size * img_size]; + + uchar *pixel = im.ptr(0); + int endPixel = im.cols * im.rows; + for (int i = 0; i <= endPixel; i++) { + pixels[i] = (double) pixel[i] / 255; + } + + double out[img_size * img_size]; + fftw_plan plan = fftw_plan_r2r_2d( + img_size, img_size, + pixels, out, + FFTW_REDFT10, FFTW_REDFT10, // DCT-II + FFTW_ESTIMATE + ); + fftw_execute(plan); + fftw_destroy_plan(plan); + + double dct_lowfreq[hash_size * hash_size]; + double sorted[hash_size * hash_size]; + + int ptr_low = 0; + int ptr = 0; + for (int i = 0; i < hash_size; ++i) { + for (int j = 0; j < hash_size; ++j) { + dct_lowfreq[ptr_low] = out[ptr]; + sorted[ptr_low] = out[ptr]; + ptr_low += 1; + ptr += 1; + } + ptr += (img_size - hash_size); + } + + double med = median(sorted, hash_size * hash_size); + + auto *hash = new std::vector(); + + for (int i = 0; i < hash_size * hash_size; ++i) { + hash->push_back(dct_lowfreq[i] > med); + } + +// printBitSet(hash); + delete hash; +} + diff --git a/fastimagehash.h b/fastimagehash.h new file mode 100644 index 0000000..610c5e5 --- /dev/null +++ b/fastimagehash.h @@ -0,0 +1,14 @@ +#ifndef FASTIMAGEHASH_FASTIMAGEHASH_H +#define FASTIMAGEHASH_FASTIMAGEHASH_H + +#include + +void ahash(void *buf, size_t buf_len, int hash_size); + +void dhash(void *buf, size_t buf_len, int hash_size); + +void whash(void* buf, size_t buf_len, int hash_size, int img_scale); + +void phash(void* buf, size_t buf_len, int hash_size, int highfreq_factor); + +#endif diff --git a/thirdparty/benchmark b/thirdparty/benchmark new file mode 160000 index 0000000..0811f1d --- /dev/null +++ b/thirdparty/benchmark @@ -0,0 +1 @@ +Subproject commit 0811f1d782455b3c80285bebf934a7045d845ed3 diff --git a/thirdparty/get_static_libs.sh b/thirdparty/get_static_libs.sh new file mode 100755 index 0000000..f073256 --- /dev/null +++ b/thirdparty/get_static_libs.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +wget http://fftw.org/fftw-3.3.8.tar.gz +tar -xzf fftw-3.3.8.tar.gz +cd fftw-3.3.8 +./configure CFLAGS=-fPIC diff --git a/thirdparty/wavelib b/thirdparty/wavelib new file mode 160000 index 0000000..7956dc7 --- /dev/null +++ b/thirdparty/wavelib @@ -0,0 +1 @@ +Subproject commit 7956dc7bfbaa219a568eea93a3288c4ee5389a77