mirror of
https://github.com/pgvector/pgvector.git
synced 2026-07-05 04:00:55 +08:00
Added support for bit to IVFFlat
This commit is contained in:
@@ -6,6 +6,7 @@
|
||||
#include "access/tableam.h"
|
||||
#include "access/parallel.h"
|
||||
#include "access/xact.h"
|
||||
#include "bitvector.h"
|
||||
#include "catalog/index.h"
|
||||
#include "catalog/pg_operator_d.h"
|
||||
#include "catalog/pg_type_d.h"
|
||||
@@ -324,6 +325,8 @@ GetMaxDimensions(IvfflatType type)
|
||||
|
||||
if (type == IVFFLAT_TYPE_HALFVEC)
|
||||
maxDimensions *= 2;
|
||||
else if (type == IVFFLAT_TYPE_BIT)
|
||||
maxDimensions *= 32;
|
||||
|
||||
return maxDimensions;
|
||||
}
|
||||
@@ -338,6 +341,8 @@ GetItemSize(IvfflatType type, int dimensions)
|
||||
return VECTOR_SIZE(dimensions);
|
||||
else if (type == IVFFLAT_TYPE_HALFVEC)
|
||||
return HALFVEC_SIZE(dimensions);
|
||||
else if (type == IVFFLAT_TYPE_BIT)
|
||||
return VARBITTOTALLEN(dimensions);
|
||||
else
|
||||
elog(ERROR, "Unsupported type");
|
||||
}
|
||||
|
||||
@@ -46,7 +46,8 @@
|
||||
typedef enum IvfflatType
|
||||
{
|
||||
IVFFLAT_TYPE_VECTOR,
|
||||
IVFFLAT_TYPE_HALFVEC
|
||||
IVFFLAT_TYPE_HALFVEC,
|
||||
IVFFLAT_TYPE_BIT
|
||||
} IvfflatType;
|
||||
|
||||
/* Build phases */
|
||||
|
||||
@@ -3,10 +3,12 @@
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "bitvector.h"
|
||||
#include "halfutils.h"
|
||||
#include "halfvec.h"
|
||||
#include "ivfflat.h"
|
||||
#include "miscadmin.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/datum.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "vector.h"
|
||||
@@ -134,6 +136,15 @@ CompareHalfVectors(const void *a, const void *b)
|
||||
return halfvec_cmp_internal((HalfVector *) a, (HalfVector *) b);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare bit vectors
|
||||
*/
|
||||
static int
|
||||
CompareBitVectors(const void *a, const void *b)
|
||||
{
|
||||
return DirectFunctionCall2(bitcmp, VarBitPGetDatum((VarBit *) a), VarBitPGetDatum((VarBit *) b));
|
||||
}
|
||||
|
||||
/*
|
||||
* Quick approach if we have little data
|
||||
*/
|
||||
@@ -151,6 +162,8 @@ QuickCenters(Relation index, VectorArray samples, VectorArray centers, IvfflatTy
|
||||
qsort(samples->items, samples->length, samples->itemsize, CompareVectors);
|
||||
else if (type == IVFFLAT_TYPE_HALFVEC)
|
||||
qsort(samples->items, samples->length, samples->itemsize, CompareHalfVectors);
|
||||
else if (type == IVFFLAT_TYPE_BIT)
|
||||
qsort(samples->items, samples->length, samples->itemsize, CompareBitVectors);
|
||||
else
|
||||
elog(ERROR, "Unsupported type");
|
||||
|
||||
@@ -191,6 +204,16 @@ QuickCenters(Relation index, VectorArray samples, VectorArray centers, IvfflatTy
|
||||
for (int j = 0; j < dimensions; j++)
|
||||
vec->x[j] = Float4ToHalfUnchecked((float) RandomDouble());
|
||||
}
|
||||
else if (type == IVFFLAT_TYPE_BIT)
|
||||
{
|
||||
VarBit *vec = DatumGetVarBitP(center);
|
||||
|
||||
SET_VARSIZE(vec, VARBITTOTALLEN(dimensions));
|
||||
VARBITLEN(vec) = dimensions;
|
||||
|
||||
for (int j = 0; j < dimensions; j++)
|
||||
VARBITS(vec)[j / dimensions] |= (RandomDouble() > 0.5 ? 1 : 0) << (7 - (j % 8));
|
||||
}
|
||||
else
|
||||
elog(ERROR, "Unsupported type");
|
||||
|
||||
@@ -263,6 +286,17 @@ ComputeNewCenters(VectorArray samples, VectorArray aggCenters, VectorArray newCe
|
||||
aggCenter->x[k] += HalfToFloat4(vec->x[k]);
|
||||
}
|
||||
}
|
||||
else if (type == IVFFLAT_TYPE_BIT)
|
||||
{
|
||||
for (int j = 0; j < numSamples; j++)
|
||||
{
|
||||
Vector *aggCenter = (Vector *) VectorArrayGet(aggCenters, closestCenters[j]);
|
||||
VarBit *vec = (VarBit *) VectorArrayGet(samples, j);
|
||||
|
||||
for (int k = 0; k < dimensions; k++)
|
||||
aggCenter->x[k] += (float) (((VARBITS(vec)[k / 8]) >> (7 - (k % 8))) & 0x01);
|
||||
}
|
||||
}
|
||||
else
|
||||
elog(ERROR, "Unsupported type");
|
||||
|
||||
@@ -308,6 +342,21 @@ ComputeNewCenters(VectorArray samples, VectorArray aggCenters, VectorArray newCe
|
||||
newCenter->x[k] = Float4ToHalfUnchecked(aggCenter->x[k]);
|
||||
}
|
||||
}
|
||||
else if (type == IVFFLAT_TYPE_BIT)
|
||||
{
|
||||
for (int j = 0; j < numCenters; j++)
|
||||
{
|
||||
Vector *aggCenter = (Vector *) VectorArrayGet(aggCenters, j);
|
||||
VarBit *newCenter = (VarBit *) VectorArrayGet(newCenters, j);
|
||||
unsigned char *nx = VARBITS(newCenter);
|
||||
|
||||
for (uint32 k = 0; k < VARBITBYTES(newCenter); k++)
|
||||
nx[k] = 0;
|
||||
|
||||
for (int k = 0; k < dimensions; k++)
|
||||
nx[k / 8] |= (aggCenter->x[k] > 0.5) << (7 - (k % 8));
|
||||
}
|
||||
}
|
||||
|
||||
/* Normalize if needed */
|
||||
if (normprocinfo != NULL)
|
||||
@@ -425,6 +474,18 @@ ElkanKmeans(Relation index, VectorArray samples, VectorArray centers, IvfflatTyp
|
||||
vec->dim = dimensions;
|
||||
}
|
||||
}
|
||||
else if (type == IVFFLAT_TYPE_BIT)
|
||||
{
|
||||
newCenters = VectorArrayInit(numCenters, dimensions, centers->itemsize);
|
||||
|
||||
for (int j = 0; j < numCenters; j++)
|
||||
{
|
||||
VarBit *vec = (VarBit *) VectorArrayGet(newCenters, j);
|
||||
|
||||
SET_VARSIZE(vec, VARBITTOTALLEN(dimensions));
|
||||
VARBITLEN(vec) = dimensions;
|
||||
}
|
||||
}
|
||||
else
|
||||
elog(ERROR, "Unsupported type");
|
||||
|
||||
@@ -642,7 +703,7 @@ CheckCenters(Relation index, VectorArray centers, IvfflatType type)
|
||||
elog(ERROR, "Infinite value detected. Please report a bug.");
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (type != IVFFLAT_TYPE_BIT)
|
||||
elog(ERROR, "Unsupported type");
|
||||
}
|
||||
|
||||
@@ -652,6 +713,8 @@ CheckCenters(Relation index, VectorArray centers, IvfflatType type)
|
||||
qsort(centers->items, centers->length, centers->itemsize, CompareVectors);
|
||||
else if (type == IVFFLAT_TYPE_HALFVEC)
|
||||
qsort(centers->items, centers->length, centers->itemsize, CompareHalfVectors);
|
||||
else if (type == IVFFLAT_TYPE_BIT)
|
||||
qsort(centers->items, centers->length, centers->itemsize, CompareBitVectors);
|
||||
else
|
||||
elog(ERROR, "Unsupported type");
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <float.h>
|
||||
|
||||
#include "access/relscan.h"
|
||||
#include "bitvector.h"
|
||||
#include "catalog/pg_operator_d.h"
|
||||
#include "catalog/pg_type_d.h"
|
||||
#include "halfvec.h"
|
||||
@@ -195,6 +196,8 @@ GetScanValue(IndexScanDesc scan)
|
||||
value = PointerGetDatum(InitVector(so->dimensions));
|
||||
else if (type == IVFFLAT_TYPE_HALFVEC)
|
||||
value = PointerGetDatum(InitHalfVector(so->dimensions));
|
||||
else if (type == IVFFLAT_TYPE_BIT)
|
||||
value = PointerGetDatum(InitBitVector(so->dimensions));
|
||||
else
|
||||
elog(ERROR, "Unsupported type");
|
||||
}
|
||||
|
||||
@@ -73,6 +73,9 @@ IvfflatGetType(Relation index)
|
||||
Form_pg_type type;
|
||||
IvfflatType result;
|
||||
|
||||
if (typid == BITOID)
|
||||
return IVFFLAT_TYPE_BIT;
|
||||
|
||||
tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid));
|
||||
if (!HeapTupleIsValid(tuple))
|
||||
elog(ERROR, "cache lookup failed for type %u", typid);
|
||||
|
||||
Reference in New Issue
Block a user