mirror of
				https://github.com/simon987/pg_hamming.git
				synced 2025-10-25 03:56:51 +00:00 
			
		
		
		
	add hash_is_within_distance_any
This commit is contained in:
		
							parent
							
								
									3cf7882cb6
								
							
						
					
					
						commit
						53841ab679
					
				| @ -10,4 +10,7 @@ cmake . | ||||
| make | ||||
| ``` | ||||
| 
 | ||||
| The functions in this library are very domain-specific and can realistically | ||||
| only be used within the scope of [irarchives](https://github.com/simon987/irarchives). | ||||
| 
 | ||||
| See [hamming.c](hamming.c) for more information | ||||
|  | ||||
							
								
								
									
										54
									
								
								hamming.c
									
									
									
									
									
								
							
							
						
						
									
										54
									
								
								hamming.c
									
									
									
									
									
								
							| @ -78,3 +78,57 @@ Datum hash_distance(PG_FUNCTION_ARGS) { | ||||
| 
 | ||||
|     PG_RETURN_INT32(distance); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| PG_FUNCTION_INFO_V1(hash_is_within_distance_any); | ||||
| 
 | ||||
| /**
 | ||||
|  * Check if the first argument matches any (within distance 'max_distance') | ||||
|     hashes among an array of hashes | ||||
|  * | ||||
|  * It is assumed that: the first array is exactly 18 bytes long, the | ||||
|     second array is a multiple of 18 bytes | ||||
|  * | ||||
|  * Import with | ||||
|     CREATE OR REPLACE FUNCTION hash_is_within_distance_any(bytea, bytea, integer) RETURNS bool | ||||
|      AS '/path/to/libhamming.so', 'hash_is_within_distance_any' | ||||
|      LANGUAGE C STRICT;' | ||||
|  * | ||||
|  * @return the hamming distance between the two arrays | ||||
|  */ | ||||
| Datum hash_is_within_distance_any(PG_FUNCTION_ARGS) { | ||||
| 
 | ||||
|     char *h = VARDATA(PG_GETARG_BYTEA_P(0)); | ||||
|     bytea *h_bytea = PG_GETARG_BYTEA_P(1); | ||||
|     char *h_arr = VARDATA(h_bytea); | ||||
|     int32 max_distance = PG_GETARG_INT32(2); | ||||
| 
 | ||||
|     int distance; | ||||
| 
 | ||||
|     for (int i = VARSIZE(h_bytea) - 18; i >= 0; i -= 18) { | ||||
|         h_arr += 18; | ||||
|         distance = 0; | ||||
| 
 | ||||
|         distance += __builtin_popcountll( | ||||
|                 *((uint64 *) h) ^ *((uint64 *) h_arr) | ||||
|         ); | ||||
|         if (distance > max_distance) { | ||||
|             continue; | ||||
|         } | ||||
|         distance += __builtin_popcountll( | ||||
|                 *((uint64 *) h + 1) ^ *((uint64 *) h_arr + 1) | ||||
|         ); | ||||
|         if (distance > max_distance) { | ||||
|             continue; | ||||
|         } | ||||
|         distance += __builtin_popcount( | ||||
|                 *((uint16 *) h + 8) ^ *((uint16 *) h_arr + 8) | ||||
|         ); | ||||
| 
 | ||||
|         if (distance <= max_distance) { | ||||
|             PG_RETURN_BOOL(true); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     PG_RETURN_BOOL(false); | ||||
| } | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user