Initial commit

This commit is contained in:
simon 2019-07-20 09:52:31 -04:00
commit 9ef184fd7c
4 changed files with 109 additions and 0 deletions

8
.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
cmake-build-debug/
CMakeFiles
cmake_install.cmake
Makefile
CMakeCache.txt
*.so
*.cbp
.idea/

10
CMakeLists.txt Normal file
View File

@ -0,0 +1,10 @@
cmake_minimum_required(VERSION 3.14)
include_directories("/usr/include/postgresql/server/")
project(hamming C)
set(CMAKE_C_STANDARD 99)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
find_package("PostgreSQL REQUIRED")
add_library(hamming SHARED hamming.c)

13
README.md Normal file
View File

@ -0,0 +1,13 @@
# PostgreSQL hamming distance
Hamming distance for fixed-length `bytea` datatype.
### Compiling from source (Cmake)
```bash
cmake .
make
```
See [hamming.c](hamming.c) for more information

78
hamming.c Normal file
View File

@ -0,0 +1,78 @@
#include "postgresql/server/postgres.h"
#include "postgresql/server/fmgr.h"
PG_MODULE_MAGIC;
int const HASH_SIZE = 12;
int const HASH_BITS = HASH_SIZE * HASH_SIZE;
int const BYTEA_LEN = HASH_BITS / 8;
PG_FUNCTION_INFO_V1(hash_is_within_distance);
/**
* Check if the hamming distance of the two raw byte arrays
* is within the specified distance
*
* It is assumed that: the two arrays are exactly
* BYTEA_LEN bytes long
*
* Import with
CREATE OR REPLACE FUNCTION hash_is_within_distance(bytea, bytea, integer) RETURNS boolean
AS '/path/to/libhamming.so', 'hash_is_within_distance'
LANGUAGE C STRICT;'
*
* @return the hamming distance between the two arrays
*/
Datum hash_is_within_distance(PG_FUNCTION_ARGS) {
bytea *hash1 = PG_GETARG_BYTEA_P(0);
bytea *hash2 = PG_GETARG_BYTEA_P(1);
int32 max_distance = PG_GETARG_INT32(2);
int distance = 0;
char *h1 = hash1->vl_len_;
char *h2 = hash2->vl_len_;
for (int i = BYTEA_LEN; i >= 0; i--) {
distance += __builtin_popcount(h1[i] ^ h2[i]);
if (distance > max_distance) {
PG_RETURN_BOOL(false);
}
}
PG_RETURN_BOOL(true);
}
PG_FUNCTION_INFO_V1(hash_distance);
/**
* Hamming distance of two raw byte arrays
*
* It is assumed that: the two arrays are exactly
* BYTEA_LEN bytes long
*
* Import with
CREATE OR REPLACE FUNCTION hash_distance(bytea, bytea) RETURNS integer
AS '/path/to/libhamming.so', 'hash_distance'
LANGUAGE C STRICT;'
*
* @return the hamming distance between the two arrays
*/
Datum hash_distance(PG_FUNCTION_ARGS) {
bytea *hash1 = PG_GETARG_BYTEA_P(0);
bytea *hash2 = PG_GETARG_BYTEA_P(1);
int distance = 0;
char *h1 = hash1->vl_len_;
char *h2 = hash2->vl_len_;
for (int i = BYTEA_LEN; i >= 0; i--) {
distance += __builtin_popcount(h1[i] ^ h2[i]);
}
PG_RETURN_INT32(distance);
}