Added jaccard_distance function

This commit is contained in:
Andrew Kane
2024-03-25 22:35:53 -07:00
parent e7a7936bb2
commit 791fc2436f
7 changed files with 76 additions and 0 deletions

View File

@@ -58,3 +58,32 @@ hamming_distance(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8((double) distance);
}
/*
* Get the Jaccard distance between two bit strings
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(jaccard_distance);
Datum
jaccard_distance(PG_FUNCTION_ARGS)
{
VarBit *a = PG_GETARG_VARBIT_P(0);
VarBit *b = PG_GETARG_VARBIT_P(1);
unsigned char *ax = VARBITS(a);
unsigned char *bx = VARBITS(b);
uint64 aa;
uint64 bb;
uint64 ab = 0;
CheckBitLengths(VARBITLEN(a), VARBITLEN(b));
/* TODO Improve performance */
aa = pg_popcount((char *) ax, VARBITBYTES(a));
bb = pg_popcount((char *) bx, VARBITBYTES(b));
for (uint32 i = 0; i < VARBITBYTES(a); i++)
ab += pg_number_of_ones[ax[i] & bx[i]];
if (ab == 0)
PG_RETURN_FLOAT8(1);
PG_RETURN_FLOAT8(1 - (ab / ((double) (aa + bb - ab))));
}