Added support for indexing halfvec with L1 distance [skip ci]

This commit is contained in:
Andrew Kane
2024-04-22 13:00:59 -07:00
parent d46d014234
commit 70b299a7ff
10 changed files with 105 additions and 6 deletions

View File

@@ -917,6 +917,7 @@ Operator | Description | Added
<-> | Euclidean distance | unreleased
<#> | negative inner product | unreleased
<=> | cosine distance | unreleased
<+> | taxicab distance | unreleased
### Halfvec Functions

View File

@@ -251,6 +251,11 @@ CREATE OPERATOR <=> (
COMMUTATOR = '<=>'
);
CREATE OPERATOR <+> (
LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = l1_distance,
COMMUTATOR = '<+>'
);
CREATE OPERATOR + (
LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_add,
COMMUTATOR = +
@@ -335,6 +340,12 @@ CREATE OPERATOR CLASS halfvec_cosine_ops
FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec),
FUNCTION 4 l2_norm(halfvec);
CREATE OPERATOR CLASS halfvec_l1_ops
FOR TYPE halfvec USING ivfflat AS
OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops,
FUNCTION 1 l1_distance(halfvec, halfvec),
FUNCTION 3 l1_distance(halfvec, halfvec);
CREATE OPERATOR CLASS halfvec_l2_ops
FOR TYPE halfvec USING hnsw AS
OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops,
@@ -351,6 +362,11 @@ CREATE OPERATOR CLASS halfvec_cosine_ops
FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec),
FUNCTION 2 l2_norm(halfvec);
CREATE OPERATOR CLASS halfvec_l1_ops
FOR TYPE halfvec USING hnsw AS
OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops,
FUNCTION 1 l1_distance(halfvec, halfvec);
CREATE TYPE sparsevec;
CREATE FUNCTION sparsevec_in(cstring, oid, integer) RETURNS sparsevec

View File

@@ -558,6 +558,11 @@ CREATE OPERATOR <=> (
COMMUTATOR = '<=>'
);
CREATE OPERATOR <+> (
LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = l1_distance,
COMMUTATOR = '<+>'
);
CREATE OPERATOR + (
LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_add,
COMMUTATOR = +
@@ -644,6 +649,12 @@ CREATE OPERATOR CLASS halfvec_cosine_ops
FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec),
FUNCTION 4 l2_norm(halfvec);
CREATE OPERATOR CLASS halfvec_l1_ops
FOR TYPE halfvec USING ivfflat AS
OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops,
FUNCTION 1 l1_distance(halfvec, halfvec),
FUNCTION 3 l1_distance(halfvec, halfvec);
CREATE OPERATOR CLASS halfvec_l2_ops
FOR TYPE halfvec USING hnsw AS
OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops,
@@ -660,6 +671,11 @@ CREATE OPERATOR CLASS halfvec_cosine_ops
FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec),
FUNCTION 2 l2_norm(halfvec);
CREATE OPERATOR CLASS halfvec_l1_ops
FOR TYPE halfvec USING hnsw AS
OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops,
FUNCTION 1 l1_distance(halfvec, halfvec);
--- sparsevec type
CREATE TYPE sparsevec;

View File

@@ -0,0 +1,21 @@
SET enable_seqscan = off;
CREATE TABLE t (val halfvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val halfvec_l1_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <+> '[3,3,3]';
val
---------
[1,2,3]
[1,2,4]
[1,1,1]
[0,0,0]
(4 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::halfvec)) t2;
count
-------
4
(1 row)
DROP TABLE t;

View File

@@ -0,0 +1,21 @@
SET enable_seqscan = off;
CREATE TABLE t (val halfvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING ivfflat (val halfvec_l1_ops) WITH (lists = 1);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <+> '[3,3,3]';
val
---------
[1,2,3]
[1,2,4]
[1,1,1]
[0,0,0]
(4 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::halfvec)) t2;
count
-------
4
(1 row)
DROP TABLE t;

View File

@@ -0,0 +1,12 @@
SET enable_seqscan = off;
CREATE TABLE t (val halfvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val halfvec_l1_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <+> '[3,3,3]';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::halfvec)) t2;
DROP TABLE t;

View File

@@ -0,0 +1,12 @@
SET enable_seqscan = off;
CREATE TABLE t (val halfvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING ivfflat (val halfvec_l1_ops) WITH (lists = 1);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <+> '[3,3,3]';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::halfvec)) t2;
DROP TABLE t;

View File

@@ -71,8 +71,8 @@ for (1 .. 20)
}
# Check each index type
my @operators = ("<->", "<#>", "<=>");
my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops");
my @operators = ("<->", "<#>", "<=>", "<+>");
my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops", "halfvec_l1_ops");
for my $i (0 .. $#operators)
{

View File

@@ -68,8 +68,8 @@ for (1 .. 20)
}
# Check each index type
my @operators = ("<->", "<#>", "<=>");
my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops");
my @operators = ("<->", "<#>", "<=>", "<+>");
my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops", "halfvec_l1_ops");
for my $i (0 .. $#operators)
{

View File

@@ -74,8 +74,8 @@ for (1 .. 20)
}
# Check each index type
my @operators = ("<->", "<#>", "<=>");
my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops");
my @operators = ("<->", "<#>", "<=>", "<+>");
my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops", "halfvec_l1_ops");
for my $i (0 .. $#operators)
{