From 4e2b76e627936d950da60dd02eea97b3f2bcc2a3 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 16 Apr 2024 17:20:48 -0700 Subject: [PATCH] Skip duplicate center check for bit [skip ci] --- src/ivfkmeans.c | 29 ++++++++++++++------------- test/expected/ivfflat_bit_hamming.out | 4 ++++ test/sql/ivfflat_bit_hamming.sql | 1 + 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/ivfkmeans.c b/src/ivfkmeans.c index c0e5f36..3a74090 100644 --- a/src/ivfkmeans.c +++ b/src/ivfkmeans.c @@ -707,21 +707,22 @@ CheckCenters(Relation index, VectorArray centers, IvfflatType type) elog(ERROR, "Unsupported type"); } - /* Ensure no duplicate centers */ - /* Fine to sort in-place */ - if (type == IVFFLAT_TYPE_VECTOR) - qsort(centers->items, centers->length, centers->itemsize, CompareVectors); - else if (type == IVFFLAT_TYPE_HALFVEC) - qsort(centers->items, centers->length, centers->itemsize, CompareHalfVectors); - else if (type == IVFFLAT_TYPE_BIT) - qsort(centers->items, centers->length, centers->itemsize, CompareBitVectors); - else - elog(ERROR, "Unsupported type"); - - for (int i = 1; i < centers->length; i++) + if (type != IVFFLAT_TYPE_BIT) { - if (datumIsEqual(PointerGetDatum(VectorArrayGet(centers, i)), PointerGetDatum(VectorArrayGet(centers, i - 1)), false, -1)) - elog(ERROR, "Duplicate centers detected. Please report a bug."); + /* Ensure no duplicate centers */ + /* Fine to sort in-place */ + if (type == IVFFLAT_TYPE_VECTOR) + qsort(centers->items, centers->length, centers->itemsize, CompareVectors); + else if (type == IVFFLAT_TYPE_HALFVEC) + qsort(centers->items, centers->length, centers->itemsize, CompareHalfVectors); + else + elog(ERROR, "Unsupported type"); + + for (int i = 1; i < centers->length; i++) + { + if (datumIsEqual(PointerGetDatum(VectorArrayGet(centers, i)), PointerGetDatum(VectorArrayGet(centers, i - 1)), false, -1)) + elog(ERROR, "Duplicate centers detected. Please report a bug."); + } } /* Ensure no zero vectors for cosine distance */ diff --git a/test/expected/ivfflat_bit_hamming.out b/test/expected/ivfflat_bit_hamming.out index cee8317..43fbf71 100644 --- a/test/expected/ivfflat_bit_hamming.out +++ b/test/expected/ivfflat_bit_hamming.out @@ -29,4 +29,8 @@ DETAIL: This will cause low recall. HINT: Drop the index until the table has more data. CREATE INDEX ON t USING ivfflat ((val::bit(64001)) bit_hamming_ops) WITH (lists = 1); ERROR: column cannot have more than 64000 dimensions for ivfflat index +CREATE INDEX ON t USING ivfflat ((val::bit(2)) bit_hamming_ops) WITH (lists = 5); +NOTICE: ivfflat index created with little data +DETAIL: This will cause low recall. +HINT: Drop the index until the table has more data. DROP TABLE t; diff --git a/test/sql/ivfflat_bit_hamming.sql b/test/sql/ivfflat_bit_hamming.sql index 6c697ae..3f22767 100644 --- a/test/sql/ivfflat_bit_hamming.sql +++ b/test/sql/ivfflat_bit_hamming.sql @@ -16,4 +16,5 @@ CREATE TABLE t (val varbit(3)); CREATE INDEX ON t USING ivfflat (val bit_hamming_ops) WITH (lists = 1); CREATE INDEX ON t USING ivfflat ((val::bit(3)) bit_hamming_ops) WITH (lists = 1); CREATE INDEX ON t USING ivfflat ((val::bit(64001)) bit_hamming_ops) WITH (lists = 1); +CREATE INDEX ON t USING ivfflat ((val::bit(2)) bit_hamming_ops) WITH (lists = 5); DROP TABLE t;