Renamed iterative_search to iterative_scan

This commit is contained in:
Andrew Kane
2024-10-24 20:31:43 -07:00
parent 62039d74f6
commit 7043cce893
16 changed files with 76 additions and 76 deletions

View File

@@ -1,6 +1,6 @@
## 0.8.0 (unreleased)
- Added support for iterative search
- Added support for iterative index scans
- Added casts for arrays to `sparsevec`
- Improved cost estimation
- Improved performance of HNSW inserts and on-disk index builds

View File

@@ -451,31 +451,31 @@ Use [partitioning](https://www.postgresql.org/docs/current/ddl-partitioning.html
CREATE TABLE items (embedding vector(3), category_id int) PARTITION BY LIST(category_id);
```
## Iterative Search
## Iterative Index Scans
*Unreleased*
With approximate indexes, queries with filtering can return less results (due to post-filtering).
Starting with 0.8.0, you can enable iterative search. If too few results from the initial index scan match the filters, the scan will resume until enough results are found (or it reaches `hnsw.max_search_tuples` or `ivfflat.max_probes`). This can significantly improve recall.
Starting with 0.8.0, you can enable iterative index scans. If too few results from the initial index scan match the filters, the scan will resume until enough results are found (or it reaches `hnsw.max_search_tuples` or `ivfflat.max_probes`). This can significantly improve recall.
There are two modes for iterative search: strict and relaxed (due to the streaming nature of Postgres executor).
There are two modes for iterative scans: strict and relaxed (due to the streaming nature of Postgres executor).
Strict ensures results are in the exact order by distance
```sql
SET hnsw.iterative_search = strict_order;
SET hnsw.iterative_scan = strict_order;
```
Relaxed allows results to be slightly out of order by distance, but provides better recall
```sql
SET hnsw.iterative_search = relaxed_order;
SET hnsw.iterative_scan = relaxed_order;
# or
SET ivfflat.iterative_search = relaxed_order;
SET ivfflat.iterative_scan = relaxed_order;
```
Note: IVFFlat only supports relaxed order for iterative search
Note: IVFFlat only supports relaxed ordering for iterative scans
With relaxed ordering, you can use a [materialized CTE](https://www.postgresql.org/docs/current/queries-with.html#QUERIES-WITH-CTE-MATERIALIZATION) to get strict ordering

View File

@@ -18,15 +18,15 @@
#define MarkGUCPrefixReserved(x) EmitWarningsOnPlaceholders(x)
#endif
static const struct config_enum_entry hnsw_iterative_search_options[] = {
{"off", HNSW_ITERATIVE_SEARCH_OFF, false},
{"relaxed_order", HNSW_ITERATIVE_SEARCH_RELAXED, false},
{"strict_order", HNSW_ITERATIVE_SEARCH_STRICT, false},
static const struct config_enum_entry hnsw_iterative_scan_options[] = {
{"off", HNSW_ITERATIVE_SCAN_OFF, false},
{"relaxed_order", HNSW_ITERATIVE_SCAN_RELAXED, false},
{"strict_order", HNSW_ITERATIVE_SCAN_STRICT, false},
{NULL, 0, false}
};
int hnsw_ef_search;
int hnsw_iterative_search;
int hnsw_iterative_scan;
int hnsw_max_search_tuples;
double hnsw_search_mem_multiplier;
int hnsw_lock_tranche_id;
@@ -79,17 +79,17 @@ HnswInit(void)
"Valid range is 1..1000.", &hnsw_ef_search,
HNSW_DEFAULT_EF_SEARCH, HNSW_MIN_EF_SEARCH, HNSW_MAX_EF_SEARCH, PGC_USERSET, 0, NULL, NULL, NULL);
DefineCustomEnumVariable("hnsw.iterative_search", "Sets the iterative search mode",
NULL, &hnsw_iterative_search,
HNSW_ITERATIVE_SEARCH_OFF, hnsw_iterative_search_options, PGC_USERSET, 0, NULL, NULL, NULL);
DefineCustomEnumVariable("hnsw.iterative_scan", "Sets the mode for iterative scans",
NULL, &hnsw_iterative_scan,
HNSW_ITERATIVE_SCAN_OFF, hnsw_iterative_scan_options, PGC_USERSET, 0, NULL, NULL, NULL);
/* This is approximate and does not apply to the initial scan */
DefineCustomIntVariable("hnsw.max_search_tuples", "Sets the max number of candidates to visit for iterative search",
DefineCustomIntVariable("hnsw.max_search_tuples", "Sets the max number of candidates to visit for iterative scans",
NULL, &hnsw_max_search_tuples,
20000, 1, INT_MAX, PGC_USERSET, 0, NULL, NULL, NULL);
/* Same range and default as hash_mem_multiplier */
DefineCustomRealVariable("hnsw.search_mem_multiplier", "Sets the multiple of work_mem to use for iterative search",
DefineCustomRealVariable("hnsw.search_mem_multiplier", "Sets the multiple of work_mem to use for iterative scans",
NULL, &hnsw_search_mem_multiplier,
2, 1, 1000, PGC_USERSET, 0, NULL, NULL, NULL);

View File

@@ -109,17 +109,17 @@
/* Variables */
extern int hnsw_ef_search;
extern int hnsw_iterative_search;
extern int hnsw_iterative_scan;
extern int hnsw_max_search_tuples;
extern double hnsw_search_mem_multiplier;
extern int hnsw_lock_tranche_id;
typedef enum HnswIterativeSearchMode
typedef enum HnswIterativeScanMode
{
HNSW_ITERATIVE_SEARCH_OFF,
HNSW_ITERATIVE_SEARCH_RELAXED,
HNSW_ITERATIVE_SEARCH_STRICT
} HnswIterativeSearchMode;
HNSW_ITERATIVE_SCAN_OFF,
HNSW_ITERATIVE_SCAN_RELAXED,
HNSW_ITERATIVE_SCAN_STRICT
} HnswIterativeScanMode;
typedef struct HnswElementData HnswElementData;
typedef struct HnswNeighborArray HnswNeighborArray;

View File

@@ -41,7 +41,7 @@ GetScanItems(IndexScanDesc scan, Datum value)
ep = w;
}
return HnswSearchLayer(base, q, ep, hnsw_ef_search, 0, index, support, m, false, NULL, &so->v, hnsw_iterative_search != HNSW_ITERATIVE_SEARCH_OFF ? &so->discarded : NULL, true, &so->tuples);
return HnswSearchLayer(base, q, ep, hnsw_ef_search, 0, index, support, m, false, NULL, &so->v, hnsw_iterative_scan != HNSW_ITERATIVE_SCAN_OFF ? &so->discarded : NULL, true, &so->tuples);
}
/*
@@ -229,7 +229,7 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir)
if (list_length(so->w) == 0)
{
if (hnsw_iterative_search == HNSW_ITERATIVE_SEARCH_OFF)
if (hnsw_iterative_scan == HNSW_ITERATIVE_SCAN_OFF)
break;
/* Empty index */
@@ -295,7 +295,7 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir)
so->w = list_delete_last(so->w);
/* Mark memory as free for next iteration */
if (hnsw_iterative_search != HNSW_ITERATIVE_SEARCH_OFF)
if (hnsw_iterative_scan != HNSW_ITERATIVE_SCAN_OFF)
{
pfree(element);
pfree(sc);
@@ -306,7 +306,7 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir)
heaptid = &element->heaptids[--element->heaptidsLength];
if (hnsw_iterative_search == HNSW_ITERATIVE_SEARCH_STRICT)
if (hnsw_iterative_scan == HNSW_ITERATIVE_SCAN_STRICT)
{
if (sc->distance < so->previousDistance)
continue;

View File

@@ -17,13 +17,13 @@
#endif
int ivfflat_probes;
int ivfflat_iterative_search;
int ivfflat_iterative_scan;
int ivfflat_max_probes;
static relopt_kind ivfflat_relopt_kind;
static const struct config_enum_entry ivfflat_iterative_search_options[] = {
{"off", IVFFLAT_ITERATIVE_SEARCH_OFF, false},
{"relaxed_order", IVFFLAT_ITERATIVE_SEARCH_RELAXED, false},
static const struct config_enum_entry ivfflat_iterative_scan_options[] = {
{"off", IVFFLAT_ITERATIVE_SCAN_OFF, false},
{"relaxed_order", IVFFLAT_ITERATIVE_SCAN_RELAXED, false},
{NULL, 0, false}
};
@@ -41,12 +41,12 @@ IvfflatInit(void)
"Valid range is 1..lists.", &ivfflat_probes,
IVFFLAT_DEFAULT_PROBES, IVFFLAT_MIN_LISTS, IVFFLAT_MAX_LISTS, PGC_USERSET, 0, NULL, NULL, NULL);
DefineCustomEnumVariable("ivfflat.iterative_search", "Sets the iterative search mode",
NULL, &ivfflat_iterative_search,
IVFFLAT_ITERATIVE_SEARCH_OFF, ivfflat_iterative_search_options, PGC_USERSET, 0, NULL, NULL, NULL);
DefineCustomEnumVariable("ivfflat.iterative_scan", "Sets the mode for iterative scans",
NULL, &ivfflat_iterative_scan,
IVFFLAT_ITERATIVE_SCAN_OFF, ivfflat_iterative_scan_options, PGC_USERSET, 0, NULL, NULL, NULL);
/* If this is less than probes, probes is used */
DefineCustomIntVariable("ivfflat.max_probes", "Sets the max number of probes for iterative search",
DefineCustomIntVariable("ivfflat.max_probes", "Sets the max number of probes for iterative scans",
NULL, &ivfflat_max_probes,
IVFFLAT_MAX_LISTS, IVFFLAT_MIN_LISTS, IVFFLAT_MAX_LISTS, PGC_USERSET, 0, NULL, NULL, NULL);

View File

@@ -80,14 +80,14 @@
/* Variables */
extern int ivfflat_probes;
extern int ivfflat_iterative_search;
extern int ivfflat_iterative_scan;
extern int ivfflat_max_probes;
typedef enum IvfflatIterativeSearchMode
typedef enum IvfflatIterativeScanMode
{
IVFFLAT_ITERATIVE_SEARCH_OFF,
IVFFLAT_ITERATIVE_SEARCH_RELAXED
} IvfflatIterativeSearchMode;
IVFFLAT_ITERATIVE_SCAN_OFF,
IVFFLAT_ITERATIVE_SCAN_RELAXED
} IvfflatIterativeScanMode;
typedef struct VectorArrayData
{

View File

@@ -171,7 +171,7 @@ GetScanItems(IndexScanDesc scan, Datum value)
}
}
if (tuples < 100 && ivfflat_iterative_search == IVFFLAT_ITERATIVE_SEARCH_OFF)
if (tuples < 100 && ivfflat_iterative_scan == IVFFLAT_ITERATIVE_SCAN_OFF)
ereport(DEBUG1,
(errmsg("index scan found few tuples"),
errdetail("Index may have been created with little data."),
@@ -263,7 +263,7 @@ ivfflatbeginscan(Relation index, int nkeys, int norderbys)
/* Get lists and dimensions from metapage */
IvfflatGetMetaPageInfo(index, &lists, &dimensions);
if (ivfflat_iterative_search != IVFFLAT_ITERATIVE_SEARCH_OFF)
if (ivfflat_iterative_scan != IVFFLAT_ITERATIVE_SCAN_OFF)
maxProbes = Max(ivfflat_max_probes, probes);
else
maxProbes = probes;

View File

@@ -104,7 +104,7 @@ DROP TABLE t;
CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val vector_l2_ops);
SET hnsw.iterative_search = strict_order;
SET hnsw.iterative_scan = strict_order;
SET hnsw.ef_search = 1;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
val
@@ -114,7 +114,7 @@ SELECT * FROM t ORDER BY val <-> '[3,3,3]';
[0,0,0]
(3 rows)
SET hnsw.iterative_search = relaxed_order;
SET hnsw.iterative_scan = relaxed_order;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
val
---------
@@ -123,7 +123,7 @@ SELECT * FROM t ORDER BY val <-> '[3,3,3]';
[0,0,0]
(3 rows)
RESET hnsw.iterative_search;
RESET hnsw.iterative_scan;
RESET hnsw.ef_search;
DROP TABLE t;
-- unlogged
@@ -165,14 +165,14 @@ SET hnsw.ef_search = 0;
ERROR: 0 is outside the valid range for parameter "hnsw.ef_search" (1 .. 1000)
SET hnsw.ef_search = 1001;
ERROR: 1001 is outside the valid range for parameter "hnsw.ef_search" (1 .. 1000)
SHOW hnsw.iterative_search;
hnsw.iterative_search
-----------------------
SHOW hnsw.iterative_scan;
hnsw.iterative_scan
---------------------
off
(1 row)
SET hnsw.iterative_search = on;
ERROR: invalid value for parameter "hnsw.iterative_search": "on"
SET hnsw.iterative_scan = on;
ERROR: invalid value for parameter "hnsw.iterative_scan": "on"
HINT: Available values: off, relaxed_order, strict_order.
SHOW hnsw.max_search_tuples;
hnsw.max_search_tuples

View File

@@ -86,7 +86,7 @@ DROP TABLE t;
CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 3);
SET ivfflat.iterative_search = relaxed_order;
SET ivfflat.iterative_scan = relaxed_order;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
val
---------
@@ -110,7 +110,7 @@ SELECT * FROM t ORDER BY val <-> '[3,3,3]';
[1,1,1]
(2 rows)
RESET ivfflat.iterative_search;
RESET ivfflat.iterative_scan;
RESET ivfflat.max_probes;
DROP TABLE t;
-- unlogged
@@ -144,14 +144,14 @@ SET ivfflat.probes = 0;
ERROR: 0 is outside the valid range for parameter "ivfflat.probes" (1 .. 32768)
SET ivfflat.probes = 32769;
ERROR: 32769 is outside the valid range for parameter "ivfflat.probes" (1 .. 32768)
SHOW ivfflat.iterative_search;
ivfflat.iterative_search
--------------------------
SHOW ivfflat.iterative_scan;
ivfflat.iterative_scan
------------------------
off
(1 row)
SET ivfflat.iterative_search = on;
ERROR: invalid value for parameter "ivfflat.iterative_search": "on"
SET ivfflat.iterative_scan = on;
ERROR: invalid value for parameter "ivfflat.iterative_scan": "on"
HINT: Available values: off, relaxed_order.
SHOW ivfflat.max_probes;
ivfflat.max_probes

View File

@@ -63,14 +63,14 @@ CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val vector_l2_ops);
SET hnsw.iterative_search = strict_order;
SET hnsw.iterative_scan = strict_order;
SET hnsw.ef_search = 1;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SET hnsw.iterative_search = relaxed_order;
SET hnsw.iterative_scan = relaxed_order;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
RESET hnsw.iterative_search;
RESET hnsw.iterative_scan;
RESET hnsw.ef_search;
DROP TABLE t;
@@ -98,9 +98,9 @@ SHOW hnsw.ef_search;
SET hnsw.ef_search = 0;
SET hnsw.ef_search = 1001;
SHOW hnsw.iterative_search;
SHOW hnsw.iterative_scan;
SET hnsw.iterative_search = on;
SET hnsw.iterative_scan = on;
SHOW hnsw.max_search_tuples;

View File

@@ -50,7 +50,7 @@ CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 3);
SET ivfflat.iterative_search = relaxed_order;
SET ivfflat.iterative_scan = relaxed_order;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SET ivfflat.max_probes = 1;
@@ -59,7 +59,7 @@ SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SET ivfflat.max_probes = 2;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
RESET ivfflat.iterative_search;
RESET ivfflat.iterative_scan;
RESET ivfflat.max_probes;
DROP TABLE t;
@@ -84,9 +84,9 @@ SHOW ivfflat.probes;
SET ivfflat.probes = 0;
SET ivfflat.probes = 32769;
SHOW ivfflat.iterative_search;
SHOW ivfflat.iterative_scan;
SET ivfflat.iterative_search = on;
SET ivfflat.iterative_scan = on;
SHOW ivfflat.max_probes;

View File

@@ -23,7 +23,7 @@ $node->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v vector_l2_ops
my $count = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET ivfflat.probes = 10;
SET ivfflat.iterative_search = relaxed_order;
SET ivfflat.iterative_scan = relaxed_order;
SELECT COUNT(*) FROM (SELECT v FROM tst WHERE i % 10000 = 0 ORDER BY v <-> (SELECT v FROM tst LIMIT 1) LIMIT 11) t;
));
is($count, 10);
@@ -39,7 +39,7 @@ foreach ((30, 50, 70))
$count = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET ivfflat.probes = 10;
SET ivfflat.iterative_search = relaxed_order;
SET ivfflat.iterative_scan = relaxed_order;
SET ivfflat.max_probes = $max_probes;
SELECT COUNT(*) FROM (SELECT v FROM tst WHERE i % 10000 = 0 ORDER BY v <-> (SELECT v FROM tst WHERE i = $i) LIMIT 11) t;
));

View File

@@ -19,7 +19,7 @@ sub test_recall
my $explain = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET ivfflat.probes = $probes;
SET ivfflat.iterative_search = relaxed_order;
SET ivfflat.iterative_scan = relaxed_order;
EXPLAIN ANALYZE SELECT i FROM tst WHERE i % $c = 0 ORDER BY v $operator '$queries[0]' LIMIT $limit;
));
like($explain, qr/Index Scan using idx on tst/);
@@ -29,7 +29,7 @@ sub test_recall
my $actual = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET ivfflat.probes = $probes;
SET ivfflat.iterative_search = relaxed_order;
SET ivfflat.iterative_scan = relaxed_order;
SELECT i FROM tst WHERE i % $c = 0 ORDER BY v $operator '$queries[$i]' LIMIT $limit;
));
my @actual_ids = split("\n", $actual);

View File

@@ -26,7 +26,7 @@ $node->safe_psql("postgres", qq(
my $count = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET hnsw.iterative_search = relaxed_order;
SET hnsw.iterative_scan = relaxed_order;
SET hnsw.max_search_tuples = 100000;
SELECT COUNT(*) FROM (SELECT v FROM tst WHERE i % 10000 = 0 ORDER BY v <-> (SELECT v FROM tst LIMIT 1) LIMIT 11) t;
));
@@ -42,7 +42,7 @@ foreach ((30000, 50000, 70000))
{
$count = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET hnsw.iterative_search = relaxed_order;
SET hnsw.iterative_scan = relaxed_order;
SET hnsw.max_search_tuples = $max_tuples;
SELECT COUNT(*) FROM (SELECT v FROM tst WHERE i % 10000 = 0 ORDER BY v <-> (SELECT v FROM tst WHERE i = $i) LIMIT 11) t;
));
@@ -56,7 +56,7 @@ foreach ((30000, 50000, 70000))
my ($ret, $stdout, $stderr) = $node->psql("postgres", qq(
SET enable_seqscan = off;
SET hnsw.iterative_search = relaxed_order;
SET hnsw.iterative_scan = relaxed_order;
SET client_min_messages = debug1;
SET work_mem = '1MB';
SET hnsw.search_mem_multiplier = 1;

View File

@@ -21,7 +21,7 @@ sub test_recall
my $explain = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET hnsw.ef_search = $ef_search;
SET hnsw.iterative_search = $mode;
SET hnsw.iterative_scan = $mode;
EXPLAIN ANALYZE SELECT i FROM tst WHERE i % $c = 0 ORDER BY v $operator '$queries[0]' LIMIT $limit;
));
like($explain, qr/Index Scan using idx on tst/);
@@ -31,7 +31,7 @@ sub test_recall
my $actual = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET hnsw.ef_search = $ef_search;
SET hnsw.iterative_search = $mode;
SET hnsw.iterative_scan = $mode;
SELECT i FROM tst WHERE i % $c = 0 ORDER BY v $operator '$queries[$i]' LIMIT $limit;
));
my @actual_ids = split("\n", $actual);