mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-30 09:41:15 +08:00
Switched to slist for elements to reduce allocations and remove limit
This commit is contained in:
@@ -6,6 +6,7 @@
|
|||||||
#include "access/generic_xlog.h"
|
#include "access/generic_xlog.h"
|
||||||
#include "access/parallel.h"
|
#include "access/parallel.h"
|
||||||
#include "access/reloptions.h"
|
#include "access/reloptions.h"
|
||||||
|
#include "lib/ilist.h"
|
||||||
#include "nodes/execnodes.h"
|
#include "nodes/execnodes.h"
|
||||||
#include "port.h" /* for random() */
|
#include "port.h" /* for random() */
|
||||||
#include "utils/sampling.h"
|
#include "utils/sampling.h"
|
||||||
@@ -103,6 +104,7 @@ typedef struct HnswNeighborArray HnswNeighborArray;
|
|||||||
|
|
||||||
typedef struct HnswElementData
|
typedef struct HnswElementData
|
||||||
{
|
{
|
||||||
|
slist_node next;
|
||||||
ItemPointerData heaptids[HNSW_HEAPTIDS];
|
ItemPointerData heaptids[HNSW_HEAPTIDS];
|
||||||
uint8 heaptidsLength;
|
uint8 heaptidsLength;
|
||||||
uint8 level;
|
uint8 level;
|
||||||
@@ -212,7 +214,7 @@ typedef struct HnswBuildState
|
|||||||
Oid collation;
|
Oid collation;
|
||||||
|
|
||||||
/* Variables */
|
/* Variables */
|
||||||
List *elements;
|
slist_head elements;
|
||||||
HnswElement entryPoint;
|
HnswElement entryPoint;
|
||||||
double ml;
|
double ml;
|
||||||
int maxLevel;
|
int maxLevel;
|
||||||
|
|||||||
@@ -56,8 +56,6 @@
|
|||||||
#define PARALLEL_KEY_HNSW_SHARED UINT64CONST(0xA000000000000001)
|
#define PARALLEL_KEY_HNSW_SHARED UINT64CONST(0xA000000000000001)
|
||||||
#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000002)
|
#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000002)
|
||||||
|
|
||||||
#define LIST_MAX_LENGTH ((1 << 26) - 1)
|
|
||||||
|
|
||||||
#if PG_VERSION_NUM < 130000
|
#if PG_VERSION_NUM < 130000
|
||||||
#define GENERATIONCHUNK_RAWSIZE (SIZEOF_SIZE_T + SIZEOF_VOID_P * 2)
|
#define GENERATIONCHUNK_RAWSIZE (SIZEOF_SIZE_T + SIZEOF_VOID_P * 2)
|
||||||
#endif
|
#endif
|
||||||
@@ -139,7 +137,7 @@ CreateElementPages(HnswBuildState * buildstate)
|
|||||||
BlockNumber insertPage;
|
BlockNumber insertPage;
|
||||||
Buffer buf;
|
Buffer buf;
|
||||||
Page page;
|
Page page;
|
||||||
ListCell *lc;
|
slist_iter iter;
|
||||||
|
|
||||||
/* Calculate sizes */
|
/* Calculate sizes */
|
||||||
etupAllocSize = BLCKSZ;
|
etupAllocSize = BLCKSZ;
|
||||||
@@ -154,9 +152,9 @@ CreateElementPages(HnswBuildState * buildstate)
|
|||||||
page = BufferGetPage(buf);
|
page = BufferGetPage(buf);
|
||||||
HnswInitPage(buf, page);
|
HnswInitPage(buf, page);
|
||||||
|
|
||||||
foreach(lc, buildstate->elements)
|
slist_foreach(iter, &buildstate->elements)
|
||||||
{
|
{
|
||||||
HnswElement element = lfirst(lc);
|
HnswElement element = slist_container(HnswElementData, next, iter.cur);
|
||||||
Size etupSize;
|
Size etupSize;
|
||||||
Size ntupSize;
|
Size ntupSize;
|
||||||
Size combinedSize;
|
Size combinedSize;
|
||||||
@@ -229,15 +227,15 @@ CreateNeighborPages(HnswBuildState * buildstate)
|
|||||||
Relation index = buildstate->index;
|
Relation index = buildstate->index;
|
||||||
ForkNumber forkNum = buildstate->forkNum;
|
ForkNumber forkNum = buildstate->forkNum;
|
||||||
int m = buildstate->m;
|
int m = buildstate->m;
|
||||||
ListCell *lc;
|
slist_iter iter;
|
||||||
HnswNeighborTuple ntup;
|
HnswNeighborTuple ntup;
|
||||||
|
|
||||||
/* Allocate once */
|
/* Allocate once */
|
||||||
ntup = palloc0(BLCKSZ);
|
ntup = palloc0(BLCKSZ);
|
||||||
|
|
||||||
foreach(lc, buildstate->elements)
|
slist_foreach(iter, &buildstate->elements)
|
||||||
{
|
{
|
||||||
HnswElement e = lfirst(lc);
|
HnswElement e = slist_container(HnswElementData, next, iter.cur);
|
||||||
Buffer buf;
|
Buffer buf;
|
||||||
Page page;
|
Page page;
|
||||||
Size ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(e->level, m);
|
Size ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(e->level, m);
|
||||||
@@ -372,11 +370,8 @@ InsertTupleInMemory(Relation index, Datum *values, ItemPointer heaptid, HnswBuil
|
|||||||
if (dup == NULL && (entryPoint == NULL || element->level > entryPoint->level))
|
if (dup == NULL && (entryPoint == NULL || element->level > entryPoint->level))
|
||||||
buildstate->entryPoint = element;
|
buildstate->entryPoint = element;
|
||||||
|
|
||||||
/* Add to graph memory context */
|
|
||||||
oldCtx = MemoryContextSwitchTo(buildstate->graphCtx);
|
|
||||||
|
|
||||||
if (dup == NULL)
|
if (dup == NULL)
|
||||||
buildstate->elements = lappend(buildstate->elements, element);
|
slist_push_head(&buildstate->elements, &element->next);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* No need to free element since memory unlikely to be reallocated */
|
/* No need to free element since memory unlikely to be reallocated */
|
||||||
@@ -384,8 +379,6 @@ InsertTupleInMemory(Relation index, Datum *values, ItemPointer heaptid, HnswBuil
|
|||||||
HnswAddHeapTid(dup, heaptid);
|
HnswAddHeapTid(dup, heaptid);
|
||||||
}
|
}
|
||||||
|
|
||||||
MemoryContextSwitchTo(oldCtx);
|
|
||||||
|
|
||||||
/* Update memory usage */
|
/* Update memory usage */
|
||||||
#if PG_VERSION_NUM >= 130000
|
#if PG_VERSION_NUM >= 130000
|
||||||
buildstate->memoryUsed = MemoryContextMemAllocated(buildstate->graphCtx, false);
|
buildstate->memoryUsed = MemoryContextMemAllocated(buildstate->graphCtx, false);
|
||||||
@@ -416,13 +409,12 @@ BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values,
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
/* Flush pages if needed */
|
/* Flush pages if needed */
|
||||||
if (!buildstate->flushed && (buildstate->memoryUsed >= buildstate->memoryTotal || list_length(buildstate->elements) == LIST_MAX_LENGTH))
|
if (!buildstate->flushed && buildstate->memoryUsed >= buildstate->memoryTotal)
|
||||||
{
|
{
|
||||||
if (buildstate->memoryUsed >= buildstate->memoryTotal)
|
ereport(NOTICE,
|
||||||
ereport(NOTICE,
|
(errmsg("hnsw graph no longer fits into maintenance_work_mem after " INT64_FORMAT " tuples", (int64) buildstate->indtuples),
|
||||||
(errmsg("hnsw graph no longer fits into maintenance_work_mem after " INT64_FORMAT " tuples", (int64) buildstate->indtuples),
|
errdetail("Building will take significantly more time."),
|
||||||
errdetail("Building will take significantly more time."),
|
errhint("Increase maintenance_work_mem to speed up builds.")));
|
||||||
errhint("Increase maintenance_work_mem to speed up builds.")));
|
|
||||||
|
|
||||||
FlushPages(buildstate);
|
FlushPages(buildstate);
|
||||||
}
|
}
|
||||||
@@ -488,7 +480,7 @@ InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, Index
|
|||||||
buildstate->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC);
|
buildstate->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC);
|
||||||
buildstate->collation = index->rd_indcollation[0];
|
buildstate->collation = index->rd_indcollation[0];
|
||||||
|
|
||||||
buildstate->elements = NIL;
|
slist_init(&buildstate->elements);
|
||||||
buildstate->entryPoint = NULL;
|
buildstate->entryPoint = NULL;
|
||||||
buildstate->ml = HnswGetMl(buildstate->m);
|
buildstate->ml = HnswGetMl(buildstate->m);
|
||||||
buildstate->maxLevel = HnswGetMaxLevel(buildstate->m);
|
buildstate->maxLevel = HnswGetMaxLevel(buildstate->m);
|
||||||
|
|||||||
Reference in New Issue
Block a user