mirror of
https://github.com/pgvector/pgvector.git
synced 2026-07-04 03:30:56 +08:00
Fixed results for NULL and NaN distances - fixes #205
Co-authored-by: Xiaoran Wang <wxiaoran@vmware.com>
This commit is contained in:
@@ -6,6 +6,7 @@
|
||||
- Added `sum` aggregate
|
||||
- Improved performance of distance functions
|
||||
- Fixed out of range results for cosine distance
|
||||
- Fixed results for NULL and NaN distances
|
||||
|
||||
## 0.4.4 (2023-06-12)
|
||||
|
||||
|
||||
@@ -180,6 +180,29 @@ GetScanItems(IndexScanDesc scan, Datum value)
|
||||
tuplesort_performsort(so->sortstate);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get dimensions from metapage
|
||||
*/
|
||||
static int
|
||||
GetDimensions(Relation index)
|
||||
{
|
||||
Buffer buf;
|
||||
Page page;
|
||||
IvfflatMetaPage metap;
|
||||
int dimensions;
|
||||
|
||||
buf = ReadBuffer(index, IVFFLAT_METAPAGE_BLKNO);
|
||||
LockBuffer(buf, BUFFER_LOCK_SHARE);
|
||||
page = BufferGetPage(buf);
|
||||
metap = IvfflatPageGetMeta(page);
|
||||
|
||||
dimensions = metap->dimensions;
|
||||
|
||||
UnlockReleaseBuffer(buf);
|
||||
|
||||
return dimensions;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare for an index scan
|
||||
*/
|
||||
@@ -285,21 +308,19 @@ ivfflatgettuple(IndexScanDesc scan, ScanDirection dir)
|
||||
if (scan->orderByData == NULL)
|
||||
elog(ERROR, "cannot scan ivfflat index without order");
|
||||
|
||||
/* No items will match if null */
|
||||
if (scan->orderByData->sk_flags & SK_ISNULL)
|
||||
return false;
|
||||
|
||||
value = scan->orderByData->sk_argument;
|
||||
|
||||
/* Value should not be compressed or toasted */
|
||||
Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
|
||||
Assert(!VARATT_IS_EXTENDED(DatumGetPointer(value)));
|
||||
|
||||
if (so->normprocinfo != NULL)
|
||||
value = PointerGetDatum(InitVector(GetDimensions(scan->indexRelation)));
|
||||
else
|
||||
{
|
||||
/* No items will match if normalization fails */
|
||||
if (!IvfflatNormValue(so->normprocinfo, so->collation, &value, NULL))
|
||||
return false;
|
||||
value = scan->orderByData->sk_argument;
|
||||
|
||||
/* Value should not be compressed or toasted */
|
||||
Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
|
||||
Assert(!VARATT_IS_EXTENDED(DatumGetPointer(value)));
|
||||
|
||||
/* Fine if normalization fails */
|
||||
if (so->normprocinfo != NULL)
|
||||
IvfflatNormValue(so->normprocinfo, so->collation, &value, NULL);
|
||||
}
|
||||
|
||||
IvfflatBench("GetScanLists", GetScanLists(scan, value));
|
||||
|
||||
@@ -11,9 +11,16 @@ SELECT * FROM t ORDER BY val <=> '[3,3,3]';
|
||||
[1,2,4]
|
||||
(3 rows)
|
||||
|
||||
SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector);
|
||||
val
|
||||
-----
|
||||
(0 rows)
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
|
||||
count
|
||||
-------
|
||||
3
|
||||
(1 row)
|
||||
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector)) t2;
|
||||
count
|
||||
-------
|
||||
3
|
||||
(1 row)
|
||||
|
||||
DROP TABLE t;
|
||||
|
||||
@@ -12,9 +12,10 @@ SELECT * FROM t ORDER BY val <#> '[3,3,3]';
|
||||
[0,0,0]
|
||||
(4 rows)
|
||||
|
||||
SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector);
|
||||
val
|
||||
-----
|
||||
(0 rows)
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector)) t2;
|
||||
count
|
||||
-------
|
||||
4
|
||||
(1 row)
|
||||
|
||||
DROP TABLE t;
|
||||
|
||||
@@ -13,9 +13,13 @@ SELECT * FROM t ORDER BY val <-> '[3,3,3]';
|
||||
(4 rows)
|
||||
|
||||
SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector);
|
||||
val
|
||||
-----
|
||||
(0 rows)
|
||||
val
|
||||
---------
|
||||
[0,0,0]
|
||||
[1,1,1]
|
||||
[1,2,3]
|
||||
[1,2,4]
|
||||
(4 rows)
|
||||
|
||||
SELECT COUNT(*) FROM t;
|
||||
count
|
||||
|
||||
@@ -7,6 +7,7 @@ CREATE INDEX ON t USING ivfflat (val vector_cosine_ops) WITH (lists = 1);
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
|
||||
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
|
||||
SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector);
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector)) t2;
|
||||
|
||||
DROP TABLE t;
|
||||
|
||||
@@ -7,6 +7,6 @@ CREATE INDEX ON t USING ivfflat (val vector_ip_ops) WITH (lists = 1);
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
|
||||
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
|
||||
SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector);
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector)) t2;
|
||||
|
||||
DROP TABLE t;
|
||||
|
||||
Reference in New Issue
Block a user