diff --git a/CHANGELOG.md b/CHANGELOG.md index 44bca3f..e62cd9e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.8.5 (unreleased) + +- Reduced memory usage for small tables for IVFFlat index builds + ## 0.8.4 (2026-06-30) - Fixed `hnsw graph not repaired` error with HNSW vacuuming diff --git a/src/ivfbuild.c b/src/ivfbuild.c index e35c352..7515c43 100644 --- a/src/ivfbuild.c +++ b/src/ivfbuild.c @@ -447,6 +447,13 @@ ComputeCenters(IvfflatBuildState * buildstate) /* Skip samples for unlogged table */ if (buildstate->heap == NULL) numSamples = 1; + else + { + int64 maxTuples = (int64) RelationGetNumberOfBlocks(buildstate->heap) * MaxHeapTuplesPerPage; + + /* Save memory since will not have more than max tuples */ + numSamples = Max(Min(numSamples, maxTuples), 1); + } /* Sample rows */ buildstate->memoryUsed += VECTOR_ARRAY_SIZE(numSamples, buildstate->itemsize); diff --git a/test/expected/ivfflat_vector.out b/test/expected/ivfflat_vector.out index aad8226..23e1c01 100644 --- a/test/expected/ivfflat_vector.out +++ b/test/expected/ivfflat_vector.out @@ -173,3 +173,27 @@ ERROR: 0 is outside the valid range for parameter "ivfflat.max_probes" (1 .. 32 SET ivfflat.max_probes = 32769; ERROR: 32769 is outside the valid range for parameter "ivfflat.max_probes" (1 .. 32768) DROP TABLE t; +-- memory +CREATE TABLE t (val vector(2000)); +CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 4096); +NOTICE: ivfflat index created with little data +DETAIL: This will cause low recall. +HINT: Drop the index until the table has more data. +DROP TABLE t; +SET maintenance_work_mem = '1MB'; +CREATE TABLE t (val vector(2000)); +CREATE INDEX ON t USING ivfflat (val vector_l2_ops); +NOTICE: ivfflat index created with little data +DETAIL: This will cause low recall. +HINT: Drop the index until the table has more data. +DROP TABLE t; +RESET maintenance_work_mem; +SET maintenance_work_mem = '5MB'; +CREATE TABLE t (val vector(2000)); +INSERT INTO t (val) VALUES (array_fill(0, ARRAY[2000])); +CREATE INDEX ON t USING ivfflat (val vector_l2_ops); +NOTICE: ivfflat index created with little data +DETAIL: This will cause low recall. +HINT: Drop the index until the table has more data. +DROP TABLE t; +RESET maintenance_work_mem; diff --git a/test/sql/ivfflat_vector.sql b/test/sql/ivfflat_vector.sql index 4c20d2f..826f6f8 100644 --- a/test/sql/ivfflat_vector.sql +++ b/test/sql/ivfflat_vector.sql @@ -97,3 +97,22 @@ SET ivfflat.max_probes = 0; SET ivfflat.max_probes = 32769; DROP TABLE t; + +-- memory + +CREATE TABLE t (val vector(2000)); +CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 4096); +DROP TABLE t; + +SET maintenance_work_mem = '1MB'; +CREATE TABLE t (val vector(2000)); +CREATE INDEX ON t USING ivfflat (val vector_l2_ops); +DROP TABLE t; +RESET maintenance_work_mem; + +SET maintenance_work_mem = '5MB'; +CREATE TABLE t (val vector(2000)); +INSERT INTO t (val) VALUES (array_fill(0, ARRAY[2000])); +CREATE INDEX ON t USING ivfflat (val vector_l2_ops); +DROP TABLE t; +RESET maintenance_work_mem;