mirror of
https://github.com/pgvector/pgvector.git
synced 2026-07-01 10:11:20 +08:00
309 lines
9.0 KiB
C
309 lines
9.0 KiB
C
#ifndef HNSW_H
|
|
#define HNSW_H
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "access/generic_xlog.h"
|
|
#include "access/reloptions.h"
|
|
#include "nodes/execnodes.h"
|
|
#include "port.h" /* for random() */
|
|
#include "utils/sampling.h"
|
|
#include "vector.h"
|
|
|
|
#if PG_VERSION_NUM < 110000
|
|
#error "Requires PostgreSQL 11+"
|
|
#endif
|
|
|
|
#define HNSW_MAX_DIM 2000
|
|
|
|
/* Support functions */
|
|
#define HNSW_DISTANCE_PROC 1
|
|
#define HNSW_NORM_PROC 2
|
|
|
|
#define HNSW_VERSION 1
|
|
#define HNSW_MAGIC_NUMBER 0xA953A953
|
|
#define HNSW_PAGE_ID 0xFF90
|
|
|
|
/* Preserved page numbers */
|
|
#define HNSW_METAPAGE_BLKNO 0
|
|
#define HNSW_HEAD_BLKNO 1 /* first element page */
|
|
|
|
/* Must correspond to page numbers since page lock is used */
|
|
#define HNSW_UPDATE_LOCK 0
|
|
#define HNSW_SCAN_LOCK 1
|
|
|
|
/* HNSW parameters */
|
|
#define HNSW_DEFAULT_M 16
|
|
#define HNSW_MIN_M 2
|
|
#define HNSW_MAX_M 100
|
|
#define HNSW_DEFAULT_EF_CONSTRUCTION 64
|
|
#define HNSW_MIN_EF_CONSTRUCTION 4
|
|
#define HNSW_MAX_EF_CONSTRUCTION 1000
|
|
#define HNSW_DEFAULT_EF_SEARCH 40
|
|
#define HNSW_MIN_EF_SEARCH 1
|
|
#define HNSW_MAX_EF_SEARCH 1000
|
|
|
|
/* Tuple types */
|
|
#define HNSW_ELEMENT_TUPLE_TYPE 1
|
|
#define HNSW_NEIGHBOR_TUPLE_TYPE 2
|
|
|
|
/* Make graph robust against non-HOT updates */
|
|
#define HNSW_HEAPTIDS 10
|
|
|
|
#define HNSW_UPDATE_ENTRY_GREATER 1
|
|
#define HNSW_UPDATE_ENTRY_ALWAYS 2
|
|
|
|
/* Build phases */
|
|
/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 */
|
|
#define PROGRESS_HNSW_PHASE_LOAD 2
|
|
|
|
#define HNSW_ELEMENT_TUPLE_SIZE(_dim) MAXALIGN(offsetof(HnswElementTupleData, vec) + VECTOR_SIZE(_dim))
|
|
#define HNSW_NEIGHBOR_TUPLE_SIZE(level, m) MAXALIGN(offsetof(HnswNeighborTupleData, indextids) + ((level) + 2) * (m) * sizeof(ItemPointerData))
|
|
|
|
#define HnswPageGetOpaque(page) ((HnswPageOpaque) PageGetSpecialPointer(page))
|
|
#define HnswPageGetMeta(page) ((HnswMetaPageData *) PageGetContents(page))
|
|
|
|
#if PG_VERSION_NUM >= 150000
|
|
#define RandomDouble() pg_prng_double(&pg_global_prng_state)
|
|
#else
|
|
#define RandomDouble() (((double) random()) / MAX_RANDOM_VALUE)
|
|
#endif
|
|
|
|
#if PG_VERSION_NUM < 130000
|
|
#define list_delete_last(list) list_truncate(list, list_length(list) - 1)
|
|
#define list_sort(list, cmp) list_qsort(list, cmp)
|
|
#endif
|
|
|
|
#define HnswIsElementTuple(tup) ((tup)->type == HNSW_ELEMENT_TUPLE_TYPE)
|
|
#define HnswIsNeighborTuple(tup) ((tup)->type == HNSW_NEIGHBOR_TUPLE_TYPE)
|
|
|
|
/* 2 * M connections for ground layer */
|
|
#define HnswGetLayerM(m, layer) (layer == 0 ? (m) * 2 : (m))
|
|
|
|
/* Optimal ML from paper */
|
|
#define HnswGetMl(m) (1 / log(m))
|
|
|
|
/* Ensure fits on page and in uint8 */
|
|
#define HnswGetMaxLevel(m) Min(((BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData)) - offsetof(HnswNeighborTupleData, indextids) - sizeof(ItemIdData)) / (sizeof(ItemPointerData)) / m) - 2, 255)
|
|
|
|
/* Variables */
|
|
extern int hnsw_ef_search;
|
|
|
|
typedef struct HnswNeighborArray HnswNeighborArray;
|
|
|
|
typedef struct HnswElementData
|
|
{
|
|
List *heaptids;
|
|
uint8 level;
|
|
uint8 deleted;
|
|
HnswNeighborArray *neighbors;
|
|
BlockNumber blkno;
|
|
OffsetNumber offno;
|
|
OffsetNumber neighborOffno;
|
|
BlockNumber neighborPage;
|
|
Vector *vec;
|
|
} HnswElementData;
|
|
|
|
typedef HnswElementData * HnswElement;
|
|
|
|
typedef struct HnswCandidate
|
|
{
|
|
HnswElement element;
|
|
float distance;
|
|
} HnswCandidate;
|
|
|
|
typedef struct HnswNeighborArray
|
|
{
|
|
int length;
|
|
HnswCandidate *items;
|
|
} HnswNeighborArray;
|
|
|
|
typedef struct HnswPairingHeapNode
|
|
{
|
|
pairingheap_node ph_node;
|
|
HnswCandidate *inner;
|
|
} HnswPairingHeapNode;
|
|
|
|
/* HNSW index options */
|
|
typedef struct HnswOptions
|
|
{
|
|
int32 vl_len_; /* varlena header (do not touch directly!) */
|
|
int m; /* number of connections */
|
|
int efConstruction; /* size of dynamic candidate list */
|
|
} HnswOptions;
|
|
|
|
typedef struct HnswBuildState
|
|
{
|
|
/* Info */
|
|
Relation heap;
|
|
Relation index;
|
|
IndexInfo *indexInfo;
|
|
ForkNumber forkNum;
|
|
|
|
/* Settings */
|
|
int dimensions;
|
|
int m;
|
|
int efConstruction;
|
|
|
|
/* Statistics */
|
|
double indtuples;
|
|
double reltuples;
|
|
|
|
/* Support functions */
|
|
FmgrInfo *procinfo;
|
|
FmgrInfo *normprocinfo;
|
|
Oid collation;
|
|
|
|
/* Variables */
|
|
List *elements;
|
|
HnswElement entryPoint;
|
|
double ml;
|
|
int maxLevel;
|
|
double maxInMemoryElements;
|
|
bool flushed;
|
|
Vector *normvec;
|
|
|
|
/* Memory */
|
|
MemoryContext tmpCtx;
|
|
} HnswBuildState;
|
|
|
|
typedef struct HnswMetaPageData
|
|
{
|
|
uint32 magicNumber;
|
|
uint32 version;
|
|
uint32 dimensions;
|
|
uint16 m;
|
|
uint16 efConstruction;
|
|
BlockNumber entryBlkno;
|
|
OffsetNumber entryOffno;
|
|
int16 entryLevel;
|
|
BlockNumber insertPage;
|
|
} HnswMetaPageData;
|
|
|
|
typedef HnswMetaPageData * HnswMetaPage;
|
|
|
|
typedef struct HnswPageOpaqueData
|
|
{
|
|
BlockNumber nextblkno;
|
|
uint16 unused;
|
|
uint16 page_id; /* for identification of HNSW indexes */
|
|
} HnswPageOpaqueData;
|
|
|
|
typedef HnswPageOpaqueData * HnswPageOpaque;
|
|
|
|
typedef struct HnswElementTupleData
|
|
{
|
|
uint8 type;
|
|
uint8 level;
|
|
uint8 deleted;
|
|
uint8 unused;
|
|
ItemPointerData heaptids[HNSW_HEAPTIDS];
|
|
ItemPointerData neighbortid;
|
|
uint16 unused2;
|
|
Vector vec;
|
|
} HnswElementTupleData;
|
|
|
|
typedef HnswElementTupleData * HnswElementTuple;
|
|
|
|
typedef struct HnswNeighborTupleData
|
|
{
|
|
uint8 type;
|
|
uint8 unused;
|
|
uint16 count;
|
|
ItemPointerData indextids[FLEXIBLE_ARRAY_MEMBER];
|
|
} HnswNeighborTupleData;
|
|
|
|
typedef HnswNeighborTupleData * HnswNeighborTuple;
|
|
|
|
typedef struct HnswScanOpaqueData
|
|
{
|
|
bool first;
|
|
Buffer buf;
|
|
ItemPointerData heaptid;
|
|
OffsetNumber offno;
|
|
int removedCount;
|
|
List *w;
|
|
MemoryContext tmpCtx;
|
|
|
|
/* Support functions */
|
|
FmgrInfo *procinfo;
|
|
FmgrInfo *normprocinfo;
|
|
Oid collation;
|
|
} HnswScanOpaqueData;
|
|
|
|
typedef HnswScanOpaqueData * HnswScanOpaque;
|
|
|
|
typedef struct HnswVacuumState
|
|
{
|
|
/* Info */
|
|
Relation index;
|
|
IndexBulkDeleteResult *stats;
|
|
IndexBulkDeleteCallback callback;
|
|
void *callback_state;
|
|
|
|
/* Settings */
|
|
int m;
|
|
int efConstruction;
|
|
|
|
/* Support functions */
|
|
FmgrInfo *procinfo;
|
|
Oid collation;
|
|
|
|
/* Variables */
|
|
HTAB *deleted;
|
|
BufferAccessStrategy bas;
|
|
HnswNeighborTuple ntup;
|
|
HnswElementData highestPoint;
|
|
|
|
/* Memory */
|
|
MemoryContext tmpCtx;
|
|
} HnswVacuumState;
|
|
|
|
/* Methods */
|
|
int HnswGetM(Relation index);
|
|
int HnswGetEfConstruction(Relation index);
|
|
FmgrInfo *HnswOptionalProcInfo(Relation rel, uint16 procnum);
|
|
bool HnswNormValue(FmgrInfo *procinfo, Oid collation, Datum *value, Vector * result);
|
|
void HnswCommitBuffer(Buffer buf, GenericXLogState *state);
|
|
Buffer HnswNewBuffer(Relation index, ForkNumber forkNum);
|
|
void HnswInitPage(Buffer buf, Page page);
|
|
void HnswInitRegisterPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state);
|
|
void HnswInit(void);
|
|
List *HnswSearchLayer(Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, bool inserting, HnswElement skipElement);
|
|
HnswElement HnswGetEntryPoint(Relation index);
|
|
HnswElement HnswInitElement(ItemPointer tid, int m, double ml, int maxLevel);
|
|
void HnswFreeElement(HnswElement element);
|
|
HnswElement HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno);
|
|
void HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool existing);
|
|
HnswElement HnswFindDuplicate(HnswElement e);
|
|
HnswCandidate *HnswEntryCandidate(HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, bool loadVec);
|
|
void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum);
|
|
void HnswSetNeighborTuple(HnswNeighborTuple ntup, HnswElement e, int m);
|
|
void HnswAddHeapTid(HnswElement element, ItemPointer heaptid);
|
|
void HnswInitNeighbors(HnswElement element, int m);
|
|
bool HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heapRel);
|
|
void HnswUpdateNeighborPages(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting);
|
|
void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec);
|
|
void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec);
|
|
void HnswSetElementTuple(HnswElementTuple etup, HnswElement element);
|
|
void HnswUpdateConnection(HnswElement element, HnswCandidate * hc, int m, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation);
|
|
void HnswLoadNeighbors(HnswElement element, Relation index);
|
|
|
|
/* Index access methods */
|
|
IndexBuildResult *hnswbuild(Relation heap, Relation index, IndexInfo *indexInfo);
|
|
void hnswbuildempty(Relation index);
|
|
bool hnswinsert(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heap, IndexUniqueCheck checkUnique
|
|
#if PG_VERSION_NUM >= 140000
|
|
,bool indexUnchanged
|
|
#endif
|
|
,IndexInfo *indexInfo
|
|
);
|
|
IndexBulkDeleteResult *hnswbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state);
|
|
IndexBulkDeleteResult *hnswvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats);
|
|
IndexScanDesc hnswbeginscan(Relation index, int nkeys, int norderbys);
|
|
void hnswrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys);
|
|
bool hnswgettuple(IndexScanDesc scan, ScanDirection dir);
|
|
void hnswendscan(IndexScanDesc scan);
|
|
|
|
#endif
|