diff --git a/CHANGELOG.md b/CHANGELOG.md index ea6e1e7..e5b71df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ - Added `halfvec` type - Added `sparsevec` type - Added support for indexing `bit` type -- Added support for indexing L1 distance +- Added support for indexing L1 distance with HNSW - Added `binary_quantize` function - Added `hamming_distance` function - Added `jaccard_distance` function diff --git a/README.md b/README.md index 7b29d1f..b16dbc1 100644 --- a/README.md +++ b/README.md @@ -356,24 +356,12 @@ Cosine distance CREATE INDEX ON items USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); ``` -L1 distance - unreleased - -```sql -CREATE INDEX ON items USING ivfflat (embedding vector_l1_ops) WITH (lists = 100); -``` - Hamming distance - unreleased ```sql CREATE INDEX ON items USING ivfflat (embedding bit_hamming_ops) WITH (lists = 100); ``` -Jaccard distance - unreleased - -```sql -CREATE INDEX ON items USING ivfflat (embedding bit_jaccard_ops) WITH (lists = 100); -``` - Supported types are: - `vector` - up to 2,000 dimensions diff --git a/sql/vector--0.6.2--0.7.0.sql b/sql/vector--0.6.2--0.7.0.sql index bed067b..4b62d6f 100644 --- a/sql/vector--0.6.2--0.7.0.sql +++ b/sql/vector--0.6.2--0.7.0.sql @@ -22,12 +22,6 @@ CREATE OPERATOR || ( LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_concat ); -CREATE OPERATOR CLASS vector_l1_ops - FOR TYPE vector USING ivfflat AS - OPERATOR 1 <+> (vector, vector) FOR ORDER BY float_ops, - FUNCTION 1 l1_distance(vector, vector), - FUNCTION 3 l1_distance(vector, vector); - CREATE OPERATOR CLASS vector_l1_ops FOR TYPE vector USING hnsw AS OPERATOR 1 <+> (vector, vector) FOR ORDER BY float_ops, @@ -55,12 +49,6 @@ CREATE OPERATOR CLASS bit_hamming_ops FUNCTION 1 hamming_distance(bit, bit), FUNCTION 3 hamming_distance(bit, bit); -CREATE OPERATOR CLASS bit_jaccard_ops - FOR TYPE bit USING ivfflat AS - OPERATOR 1 <%> (bit, bit) FOR ORDER BY float_ops, - FUNCTION 1 jaccard_distance(bit, bit), - FUNCTION 3 jaccard_distance(bit, bit); - CREATE OPERATOR CLASS bit_hamming_ops FOR TYPE bit USING hnsw AS OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops, @@ -340,12 +328,6 @@ CREATE OPERATOR CLASS halfvec_cosine_ops FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), FUNCTION 4 l2_norm(halfvec); -CREATE OPERATOR CLASS halfvec_l1_ops - FOR TYPE halfvec USING ivfflat AS - OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops, - FUNCTION 1 l1_distance(halfvec, halfvec), - FUNCTION 3 l1_distance(halfvec, halfvec); - CREATE OPERATOR CLASS halfvec_l2_ops FOR TYPE halfvec USING hnsw AS OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops, diff --git a/sql/vector.sql b/sql/vector.sql index a169502..95d136d 100644 --- a/sql/vector.sql +++ b/sql/vector.sql @@ -293,12 +293,6 @@ CREATE OPERATOR CLASS vector_cosine_ops FUNCTION 3 vector_spherical_distance(vector, vector), FUNCTION 4 vector_norm(vector); -CREATE OPERATOR CLASS vector_l1_ops - FOR TYPE vector USING ivfflat AS - OPERATOR 1 <+> (vector, vector) FOR ORDER BY float_ops, - FUNCTION 1 l1_distance(vector, vector), - FUNCTION 3 l1_distance(vector, vector); - CREATE OPERATOR CLASS vector_l2_ops FOR TYPE vector USING hnsw AS OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, @@ -348,12 +342,6 @@ CREATE OPERATOR CLASS bit_hamming_ops FUNCTION 1 hamming_distance(bit, bit), FUNCTION 3 hamming_distance(bit, bit); -CREATE OPERATOR CLASS bit_jaccard_ops - FOR TYPE bit USING ivfflat AS - OPERATOR 1 <%> (bit, bit) FOR ORDER BY float_ops, - FUNCTION 1 jaccard_distance(bit, bit), - FUNCTION 3 jaccard_distance(bit, bit); - CREATE OPERATOR CLASS bit_hamming_ops FOR TYPE bit USING hnsw AS OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops, @@ -649,12 +637,6 @@ CREATE OPERATOR CLASS halfvec_cosine_ops FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), FUNCTION 4 l2_norm(halfvec); -CREATE OPERATOR CLASS halfvec_l1_ops - FOR TYPE halfvec USING ivfflat AS - OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops, - FUNCTION 1 l1_distance(halfvec, halfvec), - FUNCTION 3 l1_distance(halfvec, halfvec); - CREATE OPERATOR CLASS halfvec_l2_ops FOR TYPE halfvec USING hnsw AS OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops, diff --git a/test/expected/ivfflat_bit_jaccard.out b/test/expected/ivfflat_bit_jaccard.out deleted file mode 100644 index 0eb6b8d..0000000 --- a/test/expected/ivfflat_bit_jaccard.out +++ /dev/null @@ -1,21 +0,0 @@ -SET enable_seqscan = off; -CREATE TABLE t (val bit(4)); -INSERT INTO t (val) VALUES (B'0000'), (B'1100'), (B'1111'), (NULL); -CREATE INDEX ON t USING ivfflat (val bit_jaccard_ops) WITH (lists = 1); -INSERT INTO t (val) VALUES (B'1110'); -SELECT * FROM t ORDER BY val <%> B'1111'; - val ------- - 1111 - 1110 - 1100 - 0000 -(4 rows) - -SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <%> (SELECT NULL::bit)) t2; - count -------- - 4 -(1 row) - -DROP TABLE t; diff --git a/test/expected/ivfflat_halfvec_l1.out b/test/expected/ivfflat_halfvec_l1.out deleted file mode 100644 index dc81107..0000000 --- a/test/expected/ivfflat_halfvec_l1.out +++ /dev/null @@ -1,21 +0,0 @@ -SET enable_seqscan = off; -CREATE TABLE t (val halfvec(3)); -INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); -CREATE INDEX ON t USING ivfflat (val halfvec_l1_ops) WITH (lists = 1); -INSERT INTO t (val) VALUES ('[1,2,4]'); -SELECT * FROM t ORDER BY val <+> '[3,3,3]'; - val ---------- - [1,2,3] - [1,2,4] - [1,1,1] - [0,0,0] -(4 rows) - -SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::halfvec)) t2; - count -------- - 4 -(1 row) - -DROP TABLE t; diff --git a/test/expected/ivfflat_vector_l1.out b/test/expected/ivfflat_vector_l1.out deleted file mode 100644 index abd0050..0000000 --- a/test/expected/ivfflat_vector_l1.out +++ /dev/null @@ -1,21 +0,0 @@ -SET enable_seqscan = off; -CREATE TABLE t (val vector(3)); -INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); -CREATE INDEX ON t USING ivfflat (val vector_l1_ops) WITH (lists = 1); -INSERT INTO t (val) VALUES ('[1,2,4]'); -SELECT * FROM t ORDER BY val <+> '[3,3,3]'; - val ---------- - [1,2,3] - [1,2,4] - [1,1,1] - [0,0,0] -(4 rows) - -SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::vector)) t2; - count -------- - 4 -(1 row) - -DROP TABLE t; diff --git a/test/sql/ivfflat_bit_jaccard.sql b/test/sql/ivfflat_bit_jaccard.sql deleted file mode 100644 index 8d8b6c9..0000000 --- a/test/sql/ivfflat_bit_jaccard.sql +++ /dev/null @@ -1,12 +0,0 @@ -SET enable_seqscan = off; - -CREATE TABLE t (val bit(4)); -INSERT INTO t (val) VALUES (B'0000'), (B'1100'), (B'1111'), (NULL); -CREATE INDEX ON t USING ivfflat (val bit_jaccard_ops) WITH (lists = 1); - -INSERT INTO t (val) VALUES (B'1110'); - -SELECT * FROM t ORDER BY val <%> B'1111'; -SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <%> (SELECT NULL::bit)) t2; - -DROP TABLE t; diff --git a/test/sql/ivfflat_halfvec_l1.sql b/test/sql/ivfflat_halfvec_l1.sql deleted file mode 100644 index ef456c7..0000000 --- a/test/sql/ivfflat_halfvec_l1.sql +++ /dev/null @@ -1,12 +0,0 @@ -SET enable_seqscan = off; - -CREATE TABLE t (val halfvec(3)); -INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); -CREATE INDEX ON t USING ivfflat (val halfvec_l1_ops) WITH (lists = 1); - -INSERT INTO t (val) VALUES ('[1,2,4]'); - -SELECT * FROM t ORDER BY val <+> '[3,3,3]'; -SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::halfvec)) t2; - -DROP TABLE t; diff --git a/test/sql/ivfflat_vector_l1.sql b/test/sql/ivfflat_vector_l1.sql deleted file mode 100644 index d09c3d6..0000000 --- a/test/sql/ivfflat_vector_l1.sql +++ /dev/null @@ -1,12 +0,0 @@ -SET enable_seqscan = off; - -CREATE TABLE t (val vector(3)); -INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); -CREATE INDEX ON t USING ivfflat (val vector_l1_ops) WITH (lists = 1); - -INSERT INTO t (val) VALUES ('[1,2,4]'); - -SELECT * FROM t ORDER BY val <+> '[3,3,3]'; -SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::vector)) t2; - -DROP TABLE t; diff --git a/test/t/003_ivfflat_build_recall.pl b/test/t/003_ivfflat_build_recall.pl index e691deb..21e0c8d 100644 --- a/test/t/003_ivfflat_build_recall.pl +++ b/test/t/003_ivfflat_build_recall.pl @@ -70,8 +70,8 @@ for (1 .. 20) } # Check each index type -my @operators = ("<->", "<#>", "<=>", "<+>"); -my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops", "vector_l1_ops"); +my @operators = ("<->", "<#>", "<=>"); +my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops"); for my $i (0 .. $#operators) { diff --git a/test/t/005_ivfflat_query_recall.pl b/test/t/005_ivfflat_query_recall.pl index 93fe762..1edebb3 100644 --- a/test/t/005_ivfflat_query_recall.pl +++ b/test/t/005_ivfflat_query_recall.pl @@ -17,8 +17,8 @@ $node->safe_psql("postgres", ); # Check each index type -my @operators = ("<->", "<#>", "<=>", "<+>"); -my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops", "vector_l1_ops"); +my @operators = ("<->", "<#>", "<=>"); +my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops"); for my $i (0 .. $#operators) { diff --git a/test/t/017_ivfflat_insert_recall.pl b/test/t/017_ivfflat_insert_recall.pl index 148ccc9..c2e320c 100644 --- a/test/t/017_ivfflat_insert_recall.pl +++ b/test/t/017_ivfflat_insert_recall.pl @@ -66,8 +66,8 @@ for (1 .. 20) } # Check each index type -my @operators = ("<->", "<#>", "<=>", "<+>"); -my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops", "vector_l1_ops"); +my @operators = ("<->", "<#>", "<=>"); +my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops"); for my $i (0 .. $#operators) { diff --git a/test/t/032_ivfflat_halfvec_build_recall.pl b/test/t/032_ivfflat_halfvec_build_recall.pl index 4b056a3..6c9a3a2 100644 --- a/test/t/032_ivfflat_halfvec_build_recall.pl +++ b/test/t/032_ivfflat_halfvec_build_recall.pl @@ -74,8 +74,8 @@ for (1 .. 20) } # Check each index type -my @operators = ("<->", "<#>", "<=>", "<+>"); -my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops", "halfvec_l1_ops"); +my @operators = ("<->", "<#>", "<=>"); +my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops"); for my $i (0 .. $#operators) { @@ -102,12 +102,7 @@ for my $i (0 .. $#operators) )); # Test approximate results - if ($operator eq "<+>") - { - test_recall(1, 0.30, $operator); - test_recall(10, 0.90, $operator); - } - elsif ($operator ne "<#>") + if ($operator ne "<#>") { # TODO Fix test (uniform random vectors all have similar inner product) test_recall(1, 0.34, $operator); @@ -136,12 +131,7 @@ for my $i (0 .. $#operators) like($stderr, qr/using \d+ parallel workers/); # Test approximate results - if ($operator eq "<+>") - { - test_recall(1, 0.30, $operator); - test_recall(10, 0.90, $operator); - } - elsif ($operator ne "<#>") + if ($operator ne "<#>") { # TODO Fix test (uniform random vectors all have similar inner product) test_recall(1, 0.34, $operator); diff --git a/test/t/035_ivfflat_bit_build_recall.pl b/test/t/035_ivfflat_bit_build_recall.pl index 1ca951b..8b69d7a 100644 --- a/test/t/035_ivfflat_bit_build_recall.pl +++ b/test/t/035_ivfflat_bit_build_recall.pl @@ -70,8 +70,8 @@ for (1 .. 20) } # Check each index type -my @operators = ("<~>", "<\%>"); -my @opclasses = ("bit_hamming_ops", "bit_jaccard_ops"); +my @operators = ("<~>"); +my @opclasses = ("bit_hamming_ops"); for my $i (0 .. $#operators) {