Add 8-byte hash support

This commit is contained in:
simon987 2020-04-10 18:55:20 -04:00
parent a7c41e4959
commit 82291d600d
3 changed files with 61 additions and 9 deletions

View File

@ -6,5 +6,5 @@ set(CMAKE_C_STANDARD 99)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
find_package("PostgreSQL REQUIRED") find_package(PostgreSQL REQUIRED)
add_library(hamming SHARED hamming.c) add_library(hamming SHARED hamming.c)

View File

@ -3,7 +3,7 @@
PG_MODULE_MAGIC; PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(hash_is_within_distance); PG_FUNCTION_INFO_V1(hash_is_within_distance18);
/** /**
* Check if the hamming distance of the two raw byte arrays * Check if the hamming distance of the two raw byte arrays
@ -18,7 +18,7 @@ PG_FUNCTION_INFO_V1(hash_is_within_distance);
* *
* @return the hamming distance between the two arrays * @return the hamming distance between the two arrays
*/ */
Datum hash_is_within_distance(PG_FUNCTION_ARGS) { Datum hash_is_within_distance18(PG_FUNCTION_ARGS) {
char *h1 = VARDATA(PG_GETARG_BYTEA_P(0)); char *h1 = VARDATA(PG_GETARG_BYTEA_P(0));
char *h2 = VARDATA(PG_GETARG_BYTEA_P(1)); char *h2 = VARDATA(PG_GETARG_BYTEA_P(1));
@ -45,7 +45,30 @@ Datum hash_is_within_distance(PG_FUNCTION_ARGS) {
PG_RETURN_BOOL(distance <= max_distance); PG_RETURN_BOOL(distance <= max_distance);
} }
PG_FUNCTION_INFO_V1(hash_distance); PG_FUNCTION_INFO_V1(hash_is_within_distance8);
Datum hash_is_within_distance8(PG_FUNCTION_ARGS) {
char *h1 = VARDATA(PG_GETARG_BYTEA_P(0));
char *h2 = VARDATA(PG_GETARG_BYTEA_P(1));
int32 max_distance = PG_GETARG_INT32(2);
if (__builtin_popcountll(*((uint64 *) h1) ^ *((uint64 *) h2)) > max_distance) {
PG_RETURN_BOOL(false);
}
PG_RETURN_BOOL(true);
}
PG_FUNCTION_INFO_V1(hash_distance8);
Datum hash_distance8(PG_FUNCTION_ARGS) {
char *h1 = VARDATA(PG_GETARG_BYTEA_P(0));
char *h2 = VARDATA(PG_GETARG_BYTEA_P(1));
int distance = __builtin_popcountll(*((uint64 *) h1) ^ *((uint64 *) h2));
PG_RETURN_INT32(distance);
}
PG_FUNCTION_INFO_V1(hash_distance18);
/** /**
* Hamming distance of two raw byte arrays * Hamming distance of two raw byte arrays
@ -59,7 +82,7 @@ PG_FUNCTION_INFO_V1(hash_distance);
* *
* @return the hamming distance between the two arrays * @return the hamming distance between the two arrays
*/ */
Datum hash_distance(PG_FUNCTION_ARGS) { Datum hash_distance18(PG_FUNCTION_ARGS) {
char *h1 = VARDATA(PG_GETARG_BYTEA_P(0)); char *h1 = VARDATA(PG_GETARG_BYTEA_P(0));
char *h2 = VARDATA(PG_GETARG_BYTEA_P(1)); char *h2 = VARDATA(PG_GETARG_BYTEA_P(1));
@ -80,7 +103,7 @@ Datum hash_distance(PG_FUNCTION_ARGS) {
} }
PG_FUNCTION_INFO_V1(hash_is_within_distance_any); PG_FUNCTION_INFO_V1(hash_is_within_distance18_any);
/** /**
* Check if the first argument matches any (within distance 'max_distance') * Check if the first argument matches any (within distance 'max_distance')
@ -96,7 +119,7 @@ PG_FUNCTION_INFO_V1(hash_is_within_distance_any);
* *
* @return true if at least 1 hash matches * @return true if at least 1 hash matches
*/ */
Datum hash_is_within_distance_any(PG_FUNCTION_ARGS) { Datum hash_is_within_distance18_any(PG_FUNCTION_ARGS) {
char *h = VARDATA(PG_GETARG_BYTEA_P(0)); char *h = VARDATA(PG_GETARG_BYTEA_P(0));
bytea *h_bytea = PG_GETARG_BYTEA_P(1); bytea *h_bytea = PG_GETARG_BYTEA_P(1);
@ -134,7 +157,7 @@ Datum hash_is_within_distance_any(PG_FUNCTION_ARGS) {
PG_RETURN_BOOL(false); PG_RETURN_BOOL(false);
} }
PG_FUNCTION_INFO_V1(hash_equ_any); PG_FUNCTION_INFO_V1(hash_equ18_any);
/** /**
* Check if the first argument exactly matches any hashes among an array of hashes * Check if the first argument exactly matches any hashes among an array of hashes
@ -148,7 +171,7 @@ PG_FUNCTION_INFO_V1(hash_equ_any);
LANGUAGE C STRICT; LANGUAGE C STRICT;
* @return true if at least 1 hash is equal * @return true if at least 1 hash is equal
*/ */
Datum hash_equ_any(PG_FUNCTION_ARGS) { Datum hash_equ18_any(PG_FUNCTION_ARGS) {
char *h = VARDATA(PG_GETARG_BYTEA_P(0)); char *h = VARDATA(PG_GETARG_BYTEA_P(0));
bytea *h_bytea = PG_GETARG_BYTEA_P(1); bytea *h_bytea = PG_GETARG_BYTEA_P(1);

29
install.sh Normal file
View File

@ -0,0 +1,29 @@
#!/usr/bin/env bash
USER=dbuser
DATABASE=dbname
LIB_PATH='/path/to/libhamming.so'
psql -U $USER $DATABASE <<EOF
CREATE OR REPLACE FUNCTION hash_is_within_distance18(bytea, bytea, integer) RETURNS boolean
AS $LIB_PATH, 'hash_is_within_distance18'
LANGUAGE C STRICT;
CREATE OR REPLACE FUNCTION hash_is_within_distance8(bytea, bytea, integer) RETURNS boolean
AS $LIB_PATH, 'hash_is_within_distance8'
LANGUAGE C STRICT;
CREATE OR REPLACE FUNCTION hash_distance18(bytea, bytea) RETURNS integer
AS $LIB_PATH, 'hash_distance18'
LANGUAGE C STRICT;
CREATE OR REPLACE FUNCTION hash_distance8(bytea, bytea) RETURNS integer
AS $LIB_PATH, 'hash_distance8'
LANGUAGE C STRICT;
CREATE OR REPLACE FUNCTION hash_is_within_distance18_any(bytea, bytea, integer) RETURNS bool
AS $LIB_PATH, 'hash_is_within_distance18_any'
LANGUAGE C STRICT;
CREATE OR REPLACE FUNCTION hash_equ18_any(bytea, bytea) RETURNS bool
AS $LIB_PATH, 'hash_equ18_any'
LANGUAGE C STRICT;
EOF