Moved bit code to separate files

2026-07-22 03:57:34 +08:00 · 2024-03-25 17:10:01 -07:00
parent d9ca850faf
commit 8b819dfdc2
7 changed files with 92 additions and 78 deletions
--- a/2
+++ b/2
@@ -3,7 +3,7 @@ EXTVERSION = 0.6.2

 MODULE_big = vector
 DATA = $(wildcard sql/*--*.sql)
-OBJS = src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/vector.o
+OBJS = src/bitvector.o src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/vector.o
 HEADERS = src/vector.h

 TESTS = $(wildcard test/sql/*.sql)
--- a/Makefile.win
+++ b/Makefile.win
@@ -1,7 +1,7 @@
 EXTENSION = vector
 EXTVERSION = 0.6.2

-OBJS = src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\vector.obj
+OBJS = src\bitvector.obj src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\vector.obj
 HEADERS = src\vector.h

 REGRESS = btree cast copy functions input ivfflat_cosine ivfflat_ip ivfflat_l2 ivfflat_options ivfflat_unlogged
--- a/src/bitvector.c
+++ b/src/bitvector.c
@@ -0,0 +1,60 @@
+#include "postgres.h"
+
+#include "bitvector.h"
+#include "port/pg_bitutils.h"
+#include "utils/varbit.h"
+
+#if PG_VERSION_NUM >= 160000
+#include "varatt.h"
+#endif
+
+/*
+ * Allocate and initialize a new bit vector
+ */
+VarBit *
+InitBitVector(int dim)
+{
+	VarBit	   *result;
+	int			size;
+
+	size = VARBITTOTALLEN(dim);
+	result = (VarBit *) palloc0(size);
+	SET_VARSIZE(result, size);
+	VARBITLEN(result) = dim;
+
+	return result;
+}
+
+/*
+ * Ensure same number of bits
+ */
+static inline void
+CheckBitLengths(uint32 aLen, uint32 bLen)
+{
+	if (aLen != bLen)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("different bit lengths %u and %u", aLen, bLen)));
+}
+
+/*
+ * Get the Hamming distance between two bit strings
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(hamming_distance);
+Datum
+hamming_distance(PG_FUNCTION_ARGS)
+{
+	VarBit	   *a = PG_GETARG_VARBIT_P(0);
+	VarBit	   *b = PG_GETARG_VARBIT_P(1);
+	unsigned char *ax = VARBITS(a);
+	unsigned char *bx = VARBITS(b);
+	uint64		distance = 0;
+
+	CheckBitLengths(VARBITLEN(a), VARBITLEN(b));
+
+	/* TODO Improve performance */
+	for (uint32 i = 0; i < VARBITBYTES(a); i++)
+		distance += pg_number_of_ones[ax[i] ^ bx[i]];
+
+	PG_RETURN_FLOAT8((double) distance);
+}
--- a/src/bitvector.h
+++ b/src/bitvector.h
@@ -0,0 +1,8 @@
+#ifndef BITVECTOR_H
+#define BITVECTOR_H
+
+#include "utils/varbit.h"
+
+VarBit	   *InitBitVector(int dim);
+
+#endif
--- a/src/hnswscan.c
+++ b/src/hnswscan.c
@@ -1,13 +1,13 @@
 #include "postgres.h"

 #include "access/relscan.h"
+#include "bitvector.h"
 #include "catalog/pg_type_d.h"
 #include "hnsw.h"
 #include "pgstat.h"
 #include "storage/bufmgr.h"
 #include "storage/lmgr.h"
 #include "utils/memutils.h"
-#include "utils/varbit.h"

 /*
 * Algorithm 5 from paper
--- a/src/vector.c
+++ b/src/vector.c
@@ -2,6 +2,7 @@

 #include <math.h>

+#include "bitvector.h"
 #include "catalog/pg_type.h"
 #include "common/shortest_dec.h"
 #include "fmgr.h"
@@ -10,13 +11,11 @@
 #include "lib/stringinfo.h"
 #include "libpq/pqformat.h"
 #include "port.h"				/* for strtof() */
-#include "port/pg_bitutils.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/float.h"
 #include "utils/lsyscache.h"
 #include "utils/numeric.h"
-#include "utils/varbit.h"
 #include "vector.h"

 #if PG_VERSION_NUM >= 160000
@@ -862,6 +861,26 @@ vector_mul(PG_FUNCTION_ARGS)
 	PG_RETURN_POINTER(result);
 }

+/*
+ * Quantize a vector
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(quantize_binary);
+Datum
+quantize_binary(PG_FUNCTION_ARGS)
+{
+	Vector	   *a = PG_GETARG_VECTOR_P(0);
+	float	   *ax = a->x;
+	VarBit	   *result = InitBitVector(a->dim);
+	unsigned char *rx = VARBITS(result);
+
+	/* TODO Improve */
+	for (int i = 0; i < a->dim; i++)
+		rx[i / 8] |= (ax[i] > 0) << (7 - (i % 8));
+
+	PG_RETURN_VARBIT_P(result);
+}
+
+
 /*
 * Internal helper to compare vectors
 */
@@ -1162,73 +1181,3 @@ vector_avg(PG_FUNCTION_ARGS)

 	PG_RETURN_POINTER(result);
 }
-
-/*
- * Allocate and initialize a new bit vector
- */
-VarBit *
-InitBitVector(int dim)
-{
-	VarBit	   *result;
-	int			size;
-
-	size = VARBITTOTALLEN(dim);
-	result = (VarBit *) palloc0(size);
-	SET_VARSIZE(result, size);
-	VARBITLEN(result) = dim;
-
-	return result;
-}
-
-/*
- * Quantize a vector
- */
-PGDLLEXPORT PG_FUNCTION_INFO_V1(quantize_binary);
-Datum
-quantize_binary(PG_FUNCTION_ARGS)
-{
-	Vector	   *a = PG_GETARG_VECTOR_P(0);
-	float	   *ax = a->x;
-	VarBit	   *result = InitBitVector(a->dim);
-	unsigned char *rx = VARBITS(result);
-
-	/* TODO Improve */
-	for (int i = 0; i < a->dim; i++)
-		rx[i / 8] |= (ax[i] > 0) << (7 - (i % 8));
-
-	PG_RETURN_VARBIT_P(result);
-}
-
-/*
- * Ensure same number of bits
- */
-static inline void
-CheckBitLengths(uint32 aLen, uint32 bLen)
-{
-	if (aLen != bLen)
-		ereport(ERROR,
-				(errcode(ERRCODE_DATA_EXCEPTION),
-				 errmsg("different bit lengths %u and %u", aLen, bLen)));
-}
-
-/*
- * Get the Hamming distance between two bit strings
- */
-PGDLLEXPORT PG_FUNCTION_INFO_V1(hamming_distance);
-Datum
-hamming_distance(PG_FUNCTION_ARGS)
-{
-	VarBit	   *a = PG_GETARG_VARBIT_P(0);
-	VarBit	   *b = PG_GETARG_VARBIT_P(1);
-	unsigned char *ax = VARBITS(a);
-	unsigned char *bx = VARBITS(b);
-	uint64		distance = 0;
-
-	CheckBitLengths(VARBITLEN(a), VARBITLEN(b));
-
-	/* TODO Improve performance */
-	for (uint32 i = 0; i < VARBITBYTES(a); i++)
-		distance += pg_number_of_ones[ax[i] ^ bx[i]];
-
-	PG_RETURN_FLOAT8((double) distance);
-}
--- a/src/vector.h
+++ b/src/vector.h
@@ -1,8 +1,6 @@
 #ifndef VECTOR_H
 #define VECTOR_H

-#include "utils/varbit.h"
-
 #define VECTOR_MAX_DIM 16000

 #define VECTOR_SIZE(_dim)		(offsetof(Vector, x) + sizeof(float)*(_dim))
@@ -19,7 +17,6 @@ typedef struct Vector
 }			Vector;

 Vector	   *InitVector(int dim);
-VarBit	   *InitBitVector(int dim);
 void		PrintVector(char *msg, Vector * vector);
 int			vector_cmp_internal(Vector * a, Vector * b);