From 70b299a7ff5503854e5197dec899bc3598ea5422 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 22 Apr 2024 13:00:59 -0700 Subject: [PATCH] Added support for indexing halfvec with L1 distance [skip ci] --- README.md | 1 + sql/vector--0.6.2--0.7.0.sql | 16 ++++++++++++++++ sql/vector.sql | 16 ++++++++++++++++ test/expected/hnsw_halfvec_l1.out | 21 +++++++++++++++++++++ test/expected/ivfflat_halfvec_l1.out | 21 +++++++++++++++++++++ test/sql/hnsw_halfvec_l1.sql | 12 ++++++++++++ test/sql/ivfflat_halfvec_l1.sql | 12 ++++++++++++ test/t/021_hnsw_halfvec_build_recall.pl | 4 ++-- test/t/024_hnsw_halfvec_insert_recall.pl | 4 ++-- test/t/032_ivfflat_halfvec_build_recall.pl | 4 ++-- 10 files changed, 105 insertions(+), 6 deletions(-) create mode 100644 test/expected/hnsw_halfvec_l1.out create mode 100644 test/expected/ivfflat_halfvec_l1.out create mode 100644 test/sql/hnsw_halfvec_l1.sql create mode 100644 test/sql/ivfflat_halfvec_l1.sql diff --git a/README.md b/README.md index 3760025..ae48583 100644 --- a/README.md +++ b/README.md @@ -917,6 +917,7 @@ Operator | Description | Added <-> | Euclidean distance | unreleased <#> | negative inner product | unreleased <=> | cosine distance | unreleased +<+> | taxicab distance | unreleased ### Halfvec Functions diff --git a/sql/vector--0.6.2--0.7.0.sql b/sql/vector--0.6.2--0.7.0.sql index 9227487..5260e59 100644 --- a/sql/vector--0.6.2--0.7.0.sql +++ b/sql/vector--0.6.2--0.7.0.sql @@ -251,6 +251,11 @@ CREATE OPERATOR <=> ( COMMUTATOR = '<=>' ); +CREATE OPERATOR <+> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = l1_distance, + COMMUTATOR = '<+>' +); + CREATE OPERATOR + ( LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_add, COMMUTATOR = + @@ -335,6 +340,12 @@ CREATE OPERATOR CLASS halfvec_cosine_ops FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), FUNCTION 4 l2_norm(halfvec); +CREATE OPERATOR CLASS halfvec_l1_ops + FOR TYPE halfvec USING ivfflat AS + OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(halfvec, halfvec), + FUNCTION 3 l1_distance(halfvec, halfvec); + CREATE OPERATOR CLASS halfvec_l2_ops FOR TYPE halfvec USING hnsw AS OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops, @@ -351,6 +362,11 @@ CREATE OPERATOR CLASS halfvec_cosine_ops FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), FUNCTION 2 l2_norm(halfvec); +CREATE OPERATOR CLASS halfvec_l1_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(halfvec, halfvec); + CREATE TYPE sparsevec; CREATE FUNCTION sparsevec_in(cstring, oid, integer) RETURNS sparsevec diff --git a/sql/vector.sql b/sql/vector.sql index a039620..78951af 100644 --- a/sql/vector.sql +++ b/sql/vector.sql @@ -558,6 +558,11 @@ CREATE OPERATOR <=> ( COMMUTATOR = '<=>' ); +CREATE OPERATOR <+> ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = l1_distance, + COMMUTATOR = '<+>' +); + CREATE OPERATOR + ( LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_add, COMMUTATOR = + @@ -644,6 +649,12 @@ CREATE OPERATOR CLASS halfvec_cosine_ops FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), FUNCTION 4 l2_norm(halfvec); +CREATE OPERATOR CLASS halfvec_l1_ops + FOR TYPE halfvec USING ivfflat AS + OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(halfvec, halfvec), + FUNCTION 3 l1_distance(halfvec, halfvec); + CREATE OPERATOR CLASS halfvec_l2_ops FOR TYPE halfvec USING hnsw AS OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops, @@ -660,6 +671,11 @@ CREATE OPERATOR CLASS halfvec_cosine_ops FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec), FUNCTION 2 l2_norm(halfvec); +CREATE OPERATOR CLASS halfvec_l1_ops + FOR TYPE halfvec USING hnsw AS + OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops, + FUNCTION 1 l1_distance(halfvec, halfvec); + --- sparsevec type CREATE TYPE sparsevec; diff --git a/test/expected/hnsw_halfvec_l1.out b/test/expected/hnsw_halfvec_l1.out new file mode 100644 index 0000000..4ecf706 --- /dev/null +++ b/test/expected/hnsw_halfvec_l1.out @@ -0,0 +1,21 @@ +SET enable_seqscan = off; +CREATE TABLE t (val halfvec(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING hnsw (val halfvec_l1_ops); +INSERT INTO t (val) VALUES ('[1,2,4]'); +SELECT * FROM t ORDER BY val <+> '[3,3,3]'; + val +--------- + [1,2,3] + [1,2,4] + [1,1,1] + [0,0,0] +(4 rows) + +SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::halfvec)) t2; + count +------- + 4 +(1 row) + +DROP TABLE t; diff --git a/test/expected/ivfflat_halfvec_l1.out b/test/expected/ivfflat_halfvec_l1.out new file mode 100644 index 0000000..dc81107 --- /dev/null +++ b/test/expected/ivfflat_halfvec_l1.out @@ -0,0 +1,21 @@ +SET enable_seqscan = off; +CREATE TABLE t (val halfvec(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING ivfflat (val halfvec_l1_ops) WITH (lists = 1); +INSERT INTO t (val) VALUES ('[1,2,4]'); +SELECT * FROM t ORDER BY val <+> '[3,3,3]'; + val +--------- + [1,2,3] + [1,2,4] + [1,1,1] + [0,0,0] +(4 rows) + +SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::halfvec)) t2; + count +------- + 4 +(1 row) + +DROP TABLE t; diff --git a/test/sql/hnsw_halfvec_l1.sql b/test/sql/hnsw_halfvec_l1.sql new file mode 100644 index 0000000..afdc113 --- /dev/null +++ b/test/sql/hnsw_halfvec_l1.sql @@ -0,0 +1,12 @@ +SET enable_seqscan = off; + +CREATE TABLE t (val halfvec(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING hnsw (val halfvec_l1_ops); + +INSERT INTO t (val) VALUES ('[1,2,4]'); + +SELECT * FROM t ORDER BY val <+> '[3,3,3]'; +SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::halfvec)) t2; + +DROP TABLE t; diff --git a/test/sql/ivfflat_halfvec_l1.sql b/test/sql/ivfflat_halfvec_l1.sql new file mode 100644 index 0000000..ef456c7 --- /dev/null +++ b/test/sql/ivfflat_halfvec_l1.sql @@ -0,0 +1,12 @@ +SET enable_seqscan = off; + +CREATE TABLE t (val halfvec(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE INDEX ON t USING ivfflat (val halfvec_l1_ops) WITH (lists = 1); + +INSERT INTO t (val) VALUES ('[1,2,4]'); + +SELECT * FROM t ORDER BY val <+> '[3,3,3]'; +SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::halfvec)) t2; + +DROP TABLE t; diff --git a/test/t/021_hnsw_halfvec_build_recall.pl b/test/t/021_hnsw_halfvec_build_recall.pl index ae9404c..f83a9b1 100644 --- a/test/t/021_hnsw_halfvec_build_recall.pl +++ b/test/t/021_hnsw_halfvec_build_recall.pl @@ -71,8 +71,8 @@ for (1 .. 20) } # Check each index type -my @operators = ("<->", "<#>", "<=>"); -my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops"); +my @operators = ("<->", "<#>", "<=>", "<+>"); +my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops", "halfvec_l1_ops"); for my $i (0 .. $#operators) { diff --git a/test/t/024_hnsw_halfvec_insert_recall.pl b/test/t/024_hnsw_halfvec_insert_recall.pl index ff7c0db..6a7c501 100644 --- a/test/t/024_hnsw_halfvec_insert_recall.pl +++ b/test/t/024_hnsw_halfvec_insert_recall.pl @@ -68,8 +68,8 @@ for (1 .. 20) } # Check each index type -my @operators = ("<->", "<#>", "<=>"); -my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops"); +my @operators = ("<->", "<#>", "<=>", "<+>"); +my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops", "halfvec_l1_ops"); for my $i (0 .. $#operators) { diff --git a/test/t/032_ivfflat_halfvec_build_recall.pl b/test/t/032_ivfflat_halfvec_build_recall.pl index 6c9a3a2..f26f011 100644 --- a/test/t/032_ivfflat_halfvec_build_recall.pl +++ b/test/t/032_ivfflat_halfvec_build_recall.pl @@ -74,8 +74,8 @@ for (1 .. 20) } # Check each index type -my @operators = ("<->", "<#>", "<=>"); -my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops"); +my @operators = ("<->", "<#>", "<=>", "<+>"); +my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops", "halfvec_l1_ops"); for my $i (0 .. $#operators) {