From e8c3bf0cef4f7a985dcd2c6945fdbafa857c7c4b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Fri, 22 Dec 2023 13:35:43 -0500 Subject: [PATCH] Improved memory tracking for HNSW index builds - #384 --- src/hnsw.h | 3 ++- src/hnswbuild.c | 44 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index c1d5499..1babe02 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -215,7 +215,8 @@ typedef struct HnswBuildState HnswElement entryPoint; double ml; int maxLevel; - long memoryLeft; + long memoryUsed; + long memoryTotal; bool flushed; Vector *normvec; diff --git a/src/hnswbuild.c b/src/hnswbuild.c index 2317517..5bc7c7e 100644 --- a/src/hnswbuild.c +++ b/src/hnswbuild.c @@ -58,6 +58,10 @@ #define LIST_MAX_LENGTH ((1 << 26) - 1) +#if PG_VERSION_NUM < 130000 +#define GENERATIONCHUNK_RAWSIZE (SIZEOF_SIZE_T + SIZEOF_VOID_P * 2) +#endif + /* * Create the metapage */ @@ -260,10 +264,10 @@ CreateNeighborPages(HnswBuildState * buildstate) } /* - * Flush pages + * Show memory usage */ static void -FlushPages(HnswBuildState * buildstate) +ShowMemoryUsage(HnswBuildState * buildstate) { #ifdef HNSW_MEMORY #if PG_VERSION_NUM >= 130000 @@ -272,8 +276,18 @@ FlushPages(HnswBuildState * buildstate) MemoryContextMemAllocated(CurrentMemoryContext, true) / (1024 * 1024)); #else MemoryContextStats(CurrentMemoryContext); + elog(INFO, "estimated memory: %zu MB", buildstate->memoryUsed / (1024 * 1024)); #endif #endif +} + +/* + * Flush pages + */ +static void +FlushPages(HnswBuildState * buildstate) +{ + ShowMemoryUsage(buildstate); CreateMetaPage(buildstate); CreateElementPages(buildstate); @@ -339,6 +353,7 @@ InsertTuple(Relation index, Datum *values, HnswElement element, HnswBuildState * return *dup == NULL; } +#if PG_VERSION_NUM < 130000 /* * Get the memory used by an element */ @@ -353,10 +368,10 @@ HnswElementMemory(HnswElement e, int m) elementSize += sizeof(ItemPointerData) + SIZEOF_VOID_P; elementSize += VARSIZE_ANY(DatumGetPointer(e->value)); /* Each allocation has chunk header */ - elementSize += (e->level + 7) * (SIZEOF_SIZE_T + SIZEOF_VOID_P); - /* TODO Account for additional memory */ + elementSize += (e->level + 7) * GENERATIONCHUNK_RAWSIZE; return elementSize; } +#endif /* * Callback for table_index_build_scan @@ -379,11 +394,11 @@ BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, if (isnull[0]) return; - if (buildstate->flushed || buildstate->memoryLeft <= 0 || list_length(buildstate->elements) == LIST_MAX_LENGTH) + if (buildstate->flushed || buildstate->memoryUsed >= buildstate->memoryTotal || list_length(buildstate->elements) == LIST_MAX_LENGTH) { if (!buildstate->flushed) { - if (buildstate->memoryLeft <= 0) + if (buildstate->memoryUsed >= buildstate->memoryTotal) ereport(NOTICE, (errmsg("hnsw graph no longer fits into maintenance_work_mem after " INT64_FORMAT " tuples", (int64) buildstate->indtuples), errdetail("Building will take significantly more time."), @@ -436,14 +451,22 @@ BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, if (dup != NULL) { HnswAddHeapTid(dup, tid); - buildstate->memoryLeft -= sizeof(ItemPointerData) + sizeof(uintptr_t) + sizeof(uint64); +#if PG_VERSION_NUM >= 130000 + buildstate->memoryUsed = MemoryContextMemAllocated(buildstate->graphCtx, false); +#else + buildstate->memoryUsed += sizeof(ItemPointerData) + SIZEOF_VOID_P + GENERATIONCHUNK_RAWSIZE; +#endif } /* Add to buildstate or free */ if (inserted) { buildstate->elements = lappend(buildstate->elements, element); - buildstate->memoryLeft -= HnswElementMemory(element, buildstate->m); +#if PG_VERSION_NUM >= 130000 + buildstate->memoryUsed = MemoryContextMemAllocated(buildstate->graphCtx, false); +#else + buildstate->memoryUsed += HnswElementMemory(element, buildstate->m); +#endif } MemoryContextSwitchTo(oldCtx); @@ -486,7 +509,8 @@ InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, Index buildstate->entryPoint = NULL; buildstate->ml = HnswGetMl(buildstate->m); buildstate->maxLevel = HnswGetMaxLevel(buildstate->m); - buildstate->memoryLeft = maintenance_work_mem * 1024L; + buildstate->memoryUsed = 0; + buildstate->memoryTotal = maintenance_work_mem * 1024L; buildstate->flushed = false; /* Reuse for each tuple */ @@ -569,7 +593,7 @@ HnswParallelScanAndInsert(HnswSpool * hnswspool, HnswShared * hnswshared, bool p indexInfo->ii_Concurrent = hnswshared->isconcurrent; InitBuildState(&buildstate, hnswspool->heap, hnswspool->index, indexInfo, MAIN_FORKNUM); /* TODO Support in-memory builds */ - buildstate.memoryLeft = 0; + buildstate.memoryTotal = 0; buildstate.flushed = true; buildstate.hnswshared = hnswshared; #if PG_VERSION_NUM >= 120000