mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-06 05:51:21 +08:00
338 lines
8.6 KiB
C
338 lines
8.6 KiB
C
#ifndef IVFFLAT_H
|
|
#define IVFFLAT_H
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "access/genam.h"
|
|
#include "access/generic_xlog.h"
|
|
#include "access/parallel.h"
|
|
#include "lib/pairingheap.h"
|
|
#include "nodes/execnodes.h"
|
|
#include "port.h" /* for random() */
|
|
#include "utils/sampling.h"
|
|
#include "utils/tuplesort.h"
|
|
#include "vector.h"
|
|
|
|
#if PG_VERSION_NUM >= 150000
|
|
#include "common/pg_prng.h"
|
|
#endif
|
|
|
|
#ifdef IVFFLAT_BENCH
|
|
#include "portability/instr_time.h"
|
|
#endif
|
|
|
|
#define IVFFLAT_MAX_DIM 2000
|
|
|
|
/* Support functions */
|
|
#define IVFFLAT_DISTANCE_PROC 1
|
|
#define IVFFLAT_NORM_PROC 2
|
|
#define IVFFLAT_KMEANS_DISTANCE_PROC 3
|
|
#define IVFFLAT_KMEANS_NORM_PROC 4
|
|
#define IVFFLAT_TYPE_INFO_PROC 5
|
|
|
|
#define IVFFLAT_VERSION 1
|
|
#define IVFFLAT_MAGIC_NUMBER 0x14FF1A7
|
|
#define IVFFLAT_PAGE_ID 0xFF84
|
|
|
|
/* Preserved page numbers */
|
|
#define IVFFLAT_METAPAGE_BLKNO 0
|
|
#define IVFFLAT_HEAD_BLKNO 1 /* first list page */
|
|
|
|
/* IVFFlat parameters */
|
|
#define IVFFLAT_DEFAULT_LISTS 100
|
|
#define IVFFLAT_MIN_LISTS 1
|
|
#define IVFFLAT_MAX_LISTS 32768
|
|
#define IVFFLAT_DEFAULT_PROBES 1
|
|
|
|
/* Build phases */
|
|
/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 */
|
|
#define PROGRESS_IVFFLAT_PHASE_KMEANS 2
|
|
#define PROGRESS_IVFFLAT_PHASE_ASSIGN 3
|
|
#define PROGRESS_IVFFLAT_PHASE_LOAD 4
|
|
|
|
#define IVFFLAT_LIST_SIZE(size) (offsetof(IvfflatListData, center) + size)
|
|
|
|
#define IvfflatPageGetOpaque(page) ((IvfflatPageOpaque) PageGetSpecialPointer(page))
|
|
#define IvfflatPageGetMeta(page) ((IvfflatMetaPageData *) PageGetContents(page))
|
|
|
|
#ifdef IVFFLAT_BENCH
|
|
#define IvfflatBench(name, code) \
|
|
do { \
|
|
instr_time start; \
|
|
instr_time duration; \
|
|
INSTR_TIME_SET_CURRENT(start); \
|
|
(code); \
|
|
INSTR_TIME_SET_CURRENT(duration); \
|
|
INSTR_TIME_SUBTRACT(duration, start); \
|
|
elog(INFO, "%s: %.3f ms", name, INSTR_TIME_GET_MILLISEC(duration)); \
|
|
} while (0)
|
|
#else
|
|
#define IvfflatBench(name, code) (code)
|
|
#endif
|
|
|
|
#if PG_VERSION_NUM >= 150000
|
|
#define RandomDouble() pg_prng_double(&pg_global_prng_state)
|
|
#define RandomInt() pg_prng_uint32(&pg_global_prng_state)
|
|
#else
|
|
#define RandomDouble() (((double) random()) / MAX_RANDOM_VALUE)
|
|
#define RandomInt() random()
|
|
#endif
|
|
|
|
/* Variables */
|
|
extern int ivfflat_probes;
|
|
extern int ivfflat_iterative_search;
|
|
extern int ivfflat_iterative_search_max_probes;
|
|
|
|
typedef enum IvfflatIterativeSearchType
|
|
{
|
|
IVFFLAT_ITERATIVE_SEARCH_OFF,
|
|
IVFFLAT_ITERATIVE_SEARCH_RELAXED
|
|
} IvfflatIterativeSearchType;
|
|
|
|
typedef struct VectorArrayData
|
|
{
|
|
int length;
|
|
int maxlen;
|
|
int dim;
|
|
Size itemsize;
|
|
char *items;
|
|
} VectorArrayData;
|
|
|
|
typedef VectorArrayData * VectorArray;
|
|
|
|
typedef struct ListInfo
|
|
{
|
|
BlockNumber blkno;
|
|
OffsetNumber offno;
|
|
} ListInfo;
|
|
|
|
/* IVFFlat index options */
|
|
typedef struct IvfflatOptions
|
|
{
|
|
int32 vl_len_; /* varlena header (do not touch directly!) */
|
|
int lists; /* number of lists */
|
|
} IvfflatOptions;
|
|
|
|
typedef struct IvfflatSpool
|
|
{
|
|
Tuplesortstate *sortstate;
|
|
Relation heap;
|
|
Relation index;
|
|
} IvfflatSpool;
|
|
|
|
typedef struct IvfflatShared
|
|
{
|
|
/* Immutable state */
|
|
Oid heaprelid;
|
|
Oid indexrelid;
|
|
bool isconcurrent;
|
|
int scantuplesortstates;
|
|
|
|
/* Worker progress */
|
|
ConditionVariable workersdonecv;
|
|
|
|
/* Mutex for mutable state */
|
|
slock_t mutex;
|
|
|
|
/* Mutable state */
|
|
int nparticipantsdone;
|
|
double reltuples;
|
|
double indtuples;
|
|
|
|
#ifdef IVFFLAT_KMEANS_DEBUG
|
|
double inertia;
|
|
#endif
|
|
} IvfflatShared;
|
|
|
|
#define ParallelTableScanFromIvfflatShared(shared) \
|
|
(ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(IvfflatShared)))
|
|
|
|
typedef struct IvfflatLeader
|
|
{
|
|
ParallelContext *pcxt;
|
|
int nparticipanttuplesorts;
|
|
IvfflatShared *ivfshared;
|
|
Sharedsort *sharedsort;
|
|
Snapshot snapshot;
|
|
char *ivfcenters;
|
|
} IvfflatLeader;
|
|
|
|
typedef struct IvfflatTypeInfo
|
|
{
|
|
int maxDimensions;
|
|
Datum (*normalize) (PG_FUNCTION_ARGS);
|
|
Size (*itemSize) (int dimensions);
|
|
void (*updateCenter) (Pointer v, int dimensions, float *x);
|
|
void (*sumCenter) (Pointer v, float *x);
|
|
} IvfflatTypeInfo;
|
|
|
|
typedef struct IvfflatBuildState
|
|
{
|
|
/* Info */
|
|
Relation heap;
|
|
Relation index;
|
|
IndexInfo *indexInfo;
|
|
const IvfflatTypeInfo *typeInfo;
|
|
TupleDesc tupdesc;
|
|
|
|
/* Settings */
|
|
int dimensions;
|
|
int lists;
|
|
|
|
/* Statistics */
|
|
double indtuples;
|
|
double reltuples;
|
|
|
|
/* Support functions */
|
|
FmgrInfo *procinfo;
|
|
FmgrInfo *normprocinfo;
|
|
FmgrInfo *kmeansnormprocinfo;
|
|
Oid collation;
|
|
|
|
/* Variables */
|
|
VectorArray samples;
|
|
VectorArray centers;
|
|
ListInfo *listInfo;
|
|
|
|
#ifdef IVFFLAT_KMEANS_DEBUG
|
|
double inertia;
|
|
double *listSums;
|
|
int *listCounts;
|
|
#endif
|
|
|
|
/* Sampling */
|
|
BlockSamplerData bs;
|
|
ReservoirStateData rstate;
|
|
int rowstoskip;
|
|
|
|
/* Sorting */
|
|
Tuplesortstate *sortstate;
|
|
TupleDesc sortdesc;
|
|
TupleTableSlot *slot;
|
|
|
|
/* Memory */
|
|
MemoryContext tmpCtx;
|
|
|
|
/* Parallel builds */
|
|
IvfflatLeader *ivfleader;
|
|
} IvfflatBuildState;
|
|
|
|
typedef struct IvfflatMetaPageData
|
|
{
|
|
uint32 magicNumber;
|
|
uint32 version;
|
|
uint16 dimensions;
|
|
uint16 lists;
|
|
} IvfflatMetaPageData;
|
|
|
|
typedef IvfflatMetaPageData * IvfflatMetaPage;
|
|
|
|
typedef struct IvfflatPageOpaqueData
|
|
{
|
|
BlockNumber nextblkno;
|
|
uint16 unused;
|
|
uint16 page_id; /* for identification of IVFFlat indexes */
|
|
} IvfflatPageOpaqueData;
|
|
|
|
typedef IvfflatPageOpaqueData * IvfflatPageOpaque;
|
|
|
|
typedef struct IvfflatListData
|
|
{
|
|
BlockNumber startPage;
|
|
BlockNumber insertPage;
|
|
Vector center;
|
|
} IvfflatListData;
|
|
|
|
typedef IvfflatListData * IvfflatList;
|
|
|
|
typedef struct IvfflatScanList
|
|
{
|
|
pairingheap_node ph_node;
|
|
BlockNumber startPage;
|
|
double distance;
|
|
} IvfflatScanList;
|
|
|
|
typedef struct IvfflatScanOpaqueData
|
|
{
|
|
const IvfflatTypeInfo *typeInfo;
|
|
int probes;
|
|
int maxProbes;
|
|
int dimensions;
|
|
bool first;
|
|
Datum value;
|
|
|
|
/* Sorting */
|
|
Tuplesortstate *sortstate;
|
|
TupleDesc tupdesc;
|
|
TupleTableSlot *vslot;
|
|
TupleTableSlot *mslot;
|
|
BufferAccessStrategy bas;
|
|
|
|
/* Support functions */
|
|
FmgrInfo *procinfo;
|
|
FmgrInfo *normprocinfo;
|
|
Oid collation;
|
|
Datum (*distfunc) (FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2);
|
|
|
|
/* Lists */
|
|
pairingheap *listQueue;
|
|
BlockNumber *listPages;
|
|
int listIndex;
|
|
IvfflatScanList lists[FLEXIBLE_ARRAY_MEMBER]; /* must come last */
|
|
} IvfflatScanOpaqueData;
|
|
|
|
typedef IvfflatScanOpaqueData * IvfflatScanOpaque;
|
|
|
|
#define VECTOR_ARRAY_SIZE(_length, _size) (sizeof(VectorArrayData) + (_length) * MAXALIGN(_size))
|
|
|
|
/* Use functions instead of macros to avoid double evaluation */
|
|
|
|
static inline Pointer
|
|
VectorArrayGet(VectorArray arr, int offset)
|
|
{
|
|
return ((char *) arr->items) + (offset * arr->itemsize);
|
|
}
|
|
|
|
static inline void
|
|
VectorArraySet(VectorArray arr, int offset, Pointer val)
|
|
{
|
|
memcpy(VectorArrayGet(arr, offset), val, VARSIZE_ANY(val));
|
|
}
|
|
|
|
/* Methods */
|
|
VectorArray VectorArrayInit(int maxlen, int dimensions, Size itemsize);
|
|
void VectorArrayFree(VectorArray arr);
|
|
void IvfflatKmeans(Relation index, VectorArray samples, VectorArray centers, const IvfflatTypeInfo * typeInfo);
|
|
FmgrInfo *IvfflatOptionalProcInfo(Relation index, uint16 procnum);
|
|
Datum IvfflatNormValue(const IvfflatTypeInfo * typeInfo, Oid collation, Datum value);
|
|
bool IvfflatCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value);
|
|
int IvfflatGetLists(Relation index);
|
|
void IvfflatGetMetaPageInfo(Relation index, int *lists, int *dimensions);
|
|
void IvfflatUpdateList(Relation index, ListInfo listInfo, BlockNumber insertPage, BlockNumber originalInsertPage, BlockNumber startPage, ForkNumber forkNum);
|
|
void IvfflatCommitBuffer(Buffer buf, GenericXLogState *state);
|
|
void IvfflatAppendPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state, ForkNumber forkNum);
|
|
Buffer IvfflatNewBuffer(Relation index, ForkNumber forkNum);
|
|
void IvfflatInitPage(Buffer buf, Page page);
|
|
void IvfflatInitRegisterPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state);
|
|
void IvfflatInit(void);
|
|
const IvfflatTypeInfo *IvfflatGetTypeInfo(Relation index);
|
|
PGDLLEXPORT void IvfflatParallelBuildMain(dsm_segment *seg, shm_toc *toc);
|
|
|
|
/* Index access methods */
|
|
IndexBuildResult *ivfflatbuild(Relation heap, Relation index, IndexInfo *indexInfo);
|
|
void ivfflatbuildempty(Relation index);
|
|
bool ivfflatinsert(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heap, IndexUniqueCheck checkUnique
|
|
#if PG_VERSION_NUM >= 140000
|
|
,bool indexUnchanged
|
|
#endif
|
|
,IndexInfo *indexInfo
|
|
);
|
|
IndexBulkDeleteResult *ivfflatbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state);
|
|
IndexBulkDeleteResult *ivfflatvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats);
|
|
IndexScanDesc ivfflatbeginscan(Relation index, int nkeys, int norderbys);
|
|
void ivfflatrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys);
|
|
bool ivfflatgettuple(IndexScanDesc scan, ScanDirection dir);
|
|
void ivfflatendscan(IndexScanDesc scan);
|
|
|
|
#endif
|