mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-06 05:51:21 +08:00
Moved bit code to separate files
This commit is contained in:
2
Makefile
2
Makefile
@@ -3,7 +3,7 @@ EXTVERSION = 0.6.2
|
||||
|
||||
MODULE_big = vector
|
||||
DATA = $(wildcard sql/*--*.sql)
|
||||
OBJS = src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/vector.o
|
||||
OBJS = src/bitvector.o src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/vector.o
|
||||
HEADERS = src/vector.h
|
||||
|
||||
TESTS = $(wildcard test/sql/*.sql)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
EXTENSION = vector
|
||||
EXTVERSION = 0.6.2
|
||||
|
||||
OBJS = src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\vector.obj
|
||||
OBJS = src\bitvector.obj src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\vector.obj
|
||||
HEADERS = src\vector.h
|
||||
|
||||
REGRESS = btree cast copy functions input ivfflat_cosine ivfflat_ip ivfflat_l2 ivfflat_options ivfflat_unlogged
|
||||
|
||||
60
src/bitvector.c
Normal file
60
src/bitvector.c
Normal file
@@ -0,0 +1,60 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include "bitvector.h"
|
||||
#include "port/pg_bitutils.h"
|
||||
#include "utils/varbit.h"
|
||||
|
||||
#if PG_VERSION_NUM >= 160000
|
||||
#include "varatt.h"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Allocate and initialize a new bit vector
|
||||
*/
|
||||
VarBit *
|
||||
InitBitVector(int dim)
|
||||
{
|
||||
VarBit *result;
|
||||
int size;
|
||||
|
||||
size = VARBITTOTALLEN(dim);
|
||||
result = (VarBit *) palloc0(size);
|
||||
SET_VARSIZE(result, size);
|
||||
VARBITLEN(result) = dim;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure same number of bits
|
||||
*/
|
||||
static inline void
|
||||
CheckBitLengths(uint32 aLen, uint32 bLen)
|
||||
{
|
||||
if (aLen != bLen)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("different bit lengths %u and %u", aLen, bLen)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the Hamming distance between two bit strings
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(hamming_distance);
|
||||
Datum
|
||||
hamming_distance(PG_FUNCTION_ARGS)
|
||||
{
|
||||
VarBit *a = PG_GETARG_VARBIT_P(0);
|
||||
VarBit *b = PG_GETARG_VARBIT_P(1);
|
||||
unsigned char *ax = VARBITS(a);
|
||||
unsigned char *bx = VARBITS(b);
|
||||
uint64 distance = 0;
|
||||
|
||||
CheckBitLengths(VARBITLEN(a), VARBITLEN(b));
|
||||
|
||||
/* TODO Improve performance */
|
||||
for (uint32 i = 0; i < VARBITBYTES(a); i++)
|
||||
distance += pg_number_of_ones[ax[i] ^ bx[i]];
|
||||
|
||||
PG_RETURN_FLOAT8((double) distance);
|
||||
}
|
||||
8
src/bitvector.h
Normal file
8
src/bitvector.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifndef BITVECTOR_H
|
||||
#define BITVECTOR_H
|
||||
|
||||
#include "utils/varbit.h"
|
||||
|
||||
VarBit *InitBitVector(int dim);
|
||||
|
||||
#endif
|
||||
@@ -1,13 +1,13 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/relscan.h"
|
||||
#include "bitvector.h"
|
||||
#include "catalog/pg_type_d.h"
|
||||
#include "hnsw.h"
|
||||
#include "pgstat.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/varbit.h"
|
||||
|
||||
/*
|
||||
* Algorithm 5 from paper
|
||||
|
||||
93
src/vector.c
93
src/vector.c
@@ -2,6 +2,7 @@
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "bitvector.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "common/shortest_dec.h"
|
||||
#include "fmgr.h"
|
||||
@@ -10,13 +11,11 @@
|
||||
#include "lib/stringinfo.h"
|
||||
#include "libpq/pqformat.h"
|
||||
#include "port.h" /* for strtof() */
|
||||
#include "port/pg_bitutils.h"
|
||||
#include "utils/array.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/float.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/numeric.h"
|
||||
#include "utils/varbit.h"
|
||||
#include "vector.h"
|
||||
|
||||
#if PG_VERSION_NUM >= 160000
|
||||
@@ -862,6 +861,26 @@ vector_mul(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Quantize a vector
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(quantize_binary);
|
||||
Datum
|
||||
quantize_binary(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Vector *a = PG_GETARG_VECTOR_P(0);
|
||||
float *ax = a->x;
|
||||
VarBit *result = InitBitVector(a->dim);
|
||||
unsigned char *rx = VARBITS(result);
|
||||
|
||||
/* TODO Improve */
|
||||
for (int i = 0; i < a->dim; i++)
|
||||
rx[i / 8] |= (ax[i] > 0) << (7 - (i % 8));
|
||||
|
||||
PG_RETURN_VARBIT_P(result);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Internal helper to compare vectors
|
||||
*/
|
||||
@@ -1162,73 +1181,3 @@ vector_avg(PG_FUNCTION_ARGS)
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate and initialize a new bit vector
|
||||
*/
|
||||
VarBit *
|
||||
InitBitVector(int dim)
|
||||
{
|
||||
VarBit *result;
|
||||
int size;
|
||||
|
||||
size = VARBITTOTALLEN(dim);
|
||||
result = (VarBit *) palloc0(size);
|
||||
SET_VARSIZE(result, size);
|
||||
VARBITLEN(result) = dim;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Quantize a vector
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(quantize_binary);
|
||||
Datum
|
||||
quantize_binary(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Vector *a = PG_GETARG_VECTOR_P(0);
|
||||
float *ax = a->x;
|
||||
VarBit *result = InitBitVector(a->dim);
|
||||
unsigned char *rx = VARBITS(result);
|
||||
|
||||
/* TODO Improve */
|
||||
for (int i = 0; i < a->dim; i++)
|
||||
rx[i / 8] |= (ax[i] > 0) << (7 - (i % 8));
|
||||
|
||||
PG_RETURN_VARBIT_P(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure same number of bits
|
||||
*/
|
||||
static inline void
|
||||
CheckBitLengths(uint32 aLen, uint32 bLen)
|
||||
{
|
||||
if (aLen != bLen)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("different bit lengths %u and %u", aLen, bLen)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the Hamming distance between two bit strings
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(hamming_distance);
|
||||
Datum
|
||||
hamming_distance(PG_FUNCTION_ARGS)
|
||||
{
|
||||
VarBit *a = PG_GETARG_VARBIT_P(0);
|
||||
VarBit *b = PG_GETARG_VARBIT_P(1);
|
||||
unsigned char *ax = VARBITS(a);
|
||||
unsigned char *bx = VARBITS(b);
|
||||
uint64 distance = 0;
|
||||
|
||||
CheckBitLengths(VARBITLEN(a), VARBITLEN(b));
|
||||
|
||||
/* TODO Improve performance */
|
||||
for (uint32 i = 0; i < VARBITBYTES(a); i++)
|
||||
distance += pg_number_of_ones[ax[i] ^ bx[i]];
|
||||
|
||||
PG_RETURN_FLOAT8((double) distance);
|
||||
}
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
#ifndef VECTOR_H
|
||||
#define VECTOR_H
|
||||
|
||||
#include "utils/varbit.h"
|
||||
|
||||
#define VECTOR_MAX_DIM 16000
|
||||
|
||||
#define VECTOR_SIZE(_dim) (offsetof(Vector, x) + sizeof(float)*(_dim))
|
||||
@@ -19,7 +17,6 @@ typedef struct Vector
|
||||
} Vector;
|
||||
|
||||
Vector *InitVector(int dim);
|
||||
VarBit *InitBitVector(int dim);
|
||||
void PrintVector(char *msg, Vector * vector);
|
||||
int vector_cmp_internal(Vector * a, Vector * b);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user