mirror of
https://github.com/simon987/pg_hamming.git
synced 2025-04-04 07:22:58 +00:00
Initial commit
This commit is contained in:
commit
9ef184fd7c
8
.gitignore
vendored
Normal file
8
.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
cmake-build-debug/
|
||||
CMakeFiles
|
||||
cmake_install.cmake
|
||||
Makefile
|
||||
CMakeCache.txt
|
||||
*.so
|
||||
*.cbp
|
||||
.idea/
|
10
CMakeLists.txt
Normal file
10
CMakeLists.txt
Normal file
@ -0,0 +1,10 @@
|
||||
cmake_minimum_required(VERSION 3.14)
|
||||
include_directories("/usr/include/postgresql/server/")
|
||||
project(hamming C)
|
||||
|
||||
set(CMAKE_C_STANDARD 99)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
|
||||
|
||||
find_package("PostgreSQL REQUIRED")
|
||||
add_library(hamming SHARED hamming.c)
|
13
README.md
Normal file
13
README.md
Normal file
@ -0,0 +1,13 @@
|
||||
# PostgreSQL hamming distance
|
||||
|
||||
Hamming distance for fixed-length `bytea` datatype.
|
||||
|
||||
|
||||
### Compiling from source (Cmake)
|
||||
|
||||
```bash
|
||||
cmake .
|
||||
make
|
||||
```
|
||||
|
||||
See [hamming.c](hamming.c) for more information
|
78
hamming.c
Normal file
78
hamming.c
Normal file
@ -0,0 +1,78 @@
|
||||
#include "postgresql/server/postgres.h"
|
||||
#include "postgresql/server/fmgr.h"
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
int const HASH_SIZE = 12;
|
||||
int const HASH_BITS = HASH_SIZE * HASH_SIZE;
|
||||
int const BYTEA_LEN = HASH_BITS / 8;
|
||||
|
||||
PG_FUNCTION_INFO_V1(hash_is_within_distance);
|
||||
|
||||
/**
|
||||
* Check if the hamming distance of the two raw byte arrays
|
||||
* is within the specified distance
|
||||
*
|
||||
* It is assumed that: the two arrays are exactly
|
||||
* BYTEA_LEN bytes long
|
||||
*
|
||||
* Import with
|
||||
CREATE OR REPLACE FUNCTION hash_is_within_distance(bytea, bytea, integer) RETURNS boolean
|
||||
AS '/path/to/libhamming.so', 'hash_is_within_distance'
|
||||
LANGUAGE C STRICT;'
|
||||
*
|
||||
* @return the hamming distance between the two arrays
|
||||
*/
|
||||
Datum hash_is_within_distance(PG_FUNCTION_ARGS) {
|
||||
|
||||
bytea *hash1 = PG_GETARG_BYTEA_P(0);
|
||||
bytea *hash2 = PG_GETARG_BYTEA_P(1);
|
||||
int32 max_distance = PG_GETARG_INT32(2);
|
||||
|
||||
int distance = 0;
|
||||
|
||||
char *h1 = hash1->vl_len_;
|
||||
char *h2 = hash2->vl_len_;
|
||||
|
||||
for (int i = BYTEA_LEN; i >= 0; i--) {
|
||||
distance += __builtin_popcount(h1[i] ^ h2[i]);
|
||||
|
||||
if (distance > max_distance) {
|
||||
PG_RETURN_BOOL(false);
|
||||
}
|
||||
}
|
||||
|
||||
PG_RETURN_BOOL(true);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(hash_distance);
|
||||
|
||||
/**
|
||||
* Hamming distance of two raw byte arrays
|
||||
*
|
||||
* It is assumed that: the two arrays are exactly
|
||||
* BYTEA_LEN bytes long
|
||||
*
|
||||
* Import with
|
||||
CREATE OR REPLACE FUNCTION hash_distance(bytea, bytea) RETURNS integer
|
||||
AS '/path/to/libhamming.so', 'hash_distance'
|
||||
LANGUAGE C STRICT;'
|
||||
*
|
||||
* @return the hamming distance between the two arrays
|
||||
*/
|
||||
Datum hash_distance(PG_FUNCTION_ARGS) {
|
||||
|
||||
bytea *hash1 = PG_GETARG_BYTEA_P(0);
|
||||
bytea *hash2 = PG_GETARG_BYTEA_P(1);
|
||||
|
||||
int distance = 0;
|
||||
|
||||
char *h1 = hash1->vl_len_;
|
||||
char *h2 = hash2->vl_len_;
|
||||
|
||||
for (int i = BYTEA_LEN; i >= 0; i--) {
|
||||
distance += __builtin_popcount(h1[i] ^ h2[i]);
|
||||
}
|
||||
|
||||
PG_RETURN_INT32(distance);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user