Added support for indexing L1 distance

This commit is contained in:
Andrew Kane
2024-04-22 12:44:03 -07:00
parent 6dad8460a6
commit af9d50481d
13 changed files with 122 additions and 10 deletions

View File

@@ -3,6 +3,7 @@
- Added `halfvec` type
- Added `sparsevec` type
- Added support for indexing `bit` type
- Added support for indexing L1 distance
- Added `binary_quantize` function
- Added `hamming_distance` function
- Added `jaccard_distance` function

View File

@@ -227,6 +227,12 @@ Cosine distance
CREATE INDEX ON items USING hnsw (embedding vector_cosine_ops);
```
L1 distance - unreleased
```sql
CREATE INDEX ON items USING hnsw (embedding vector_l1_ops);
```
Hamming distance - unreleased
```sql
@@ -349,6 +355,12 @@ Cosine distance
CREATE INDEX ON items USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
```
L1 distance - unreleased
```sql
CREATE INDEX ON items USING ivfflat (embedding vector_l1_ops) WITH (lists = 100);
```
Supported types are:
- `vector` - up to 2,000 dimensions
@@ -855,6 +867,7 @@ Operator | Description | Added
<-> | Euclidean distance |
<#> | negative inner product |
<=> | cosine distance |
<+> | taxicab distance | unreleased
### Vector Functions

View File

@@ -13,10 +13,26 @@ CREATE FUNCTION subvector(vector, int, int) RETURNS vector
CREATE FUNCTION vector_concat(vector, vector) RETURNS vector
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE OPERATOR <+> (
LEFTARG = vector, RIGHTARG = vector, PROCEDURE = l1_distance,
COMMUTATOR = '<+>'
);
CREATE OPERATOR || (
LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_concat
);
CREATE OPERATOR CLASS vector_l1_ops
FOR TYPE vector USING ivfflat AS
OPERATOR 1 <+> (vector, vector) FOR ORDER BY float_ops,
FUNCTION 1 l1_distance(vector, vector),
FUNCTION 3 l1_distance(vector, vector);
CREATE OPERATOR CLASS vector_l1_ops
FOR TYPE vector USING hnsw AS
OPERATOR 1 <+> (vector, vector) FOR ORDER BY float_ops,
FUNCTION 1 l1_distance(vector, vector);
CREATE FUNCTION hamming_distance(bit, bit) RETURNS float8
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

View File

@@ -186,6 +186,11 @@ CREATE OPERATOR <=> (
COMMUTATOR = '<=>'
);
CREATE OPERATOR <+> (
LEFTARG = vector, RIGHTARG = vector, PROCEDURE = l1_distance,
COMMUTATOR = '<+>'
);
CREATE OPERATOR + (
LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_add,
COMMUTATOR = +
@@ -288,6 +293,12 @@ CREATE OPERATOR CLASS vector_cosine_ops
FUNCTION 3 vector_spherical_distance(vector, vector),
FUNCTION 4 vector_norm(vector);
CREATE OPERATOR CLASS vector_l1_ops
FOR TYPE vector USING ivfflat AS
OPERATOR 1 <+> (vector, vector) FOR ORDER BY float_ops,
FUNCTION 1 l1_distance(vector, vector),
FUNCTION 3 l1_distance(vector, vector);
CREATE OPERATOR CLASS vector_l2_ops
FOR TYPE vector USING hnsw AS
OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops,
@@ -304,6 +315,11 @@ CREATE OPERATOR CLASS vector_cosine_ops
FUNCTION 1 vector_negative_inner_product(vector, vector),
FUNCTION 2 vector_norm(vector);
CREATE OPERATOR CLASS vector_l1_ops
FOR TYPE vector USING hnsw AS
OPERATOR 1 <+> (vector, vector) FOR ORDER BY float_ops,
FUNCTION 1 l1_distance(vector, vector);
-- bit functions
CREATE FUNCTION hamming_distance(bit, bit) RETURNS float8

21
test/expected/hnsw_l1.out Normal file
View File

@@ -0,0 +1,21 @@
SET enable_seqscan = off;
CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val vector_l1_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <+> '[3,3,3]';
val
---------
[1,2,3]
[1,2,4]
[1,1,1]
[0,0,0]
(4 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::vector)) t2;
count
-------
4
(1 row)
DROP TABLE t;

View File

@@ -0,0 +1,21 @@
SET enable_seqscan = off;
CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING ivfflat (val vector_l1_ops) WITH (lists = 1);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <+> '[3,3,3]';
val
---------
[1,2,3]
[1,2,4]
[1,1,1]
[0,0,0]
(4 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::vector)) t2;
count
-------
4
(1 row)
DROP TABLE t;

12
test/sql/hnsw_l1.sql Normal file
View File

@@ -0,0 +1,12 @@
SET enable_seqscan = off;
CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val vector_l1_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <+> '[3,3,3]';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::vector)) t2;
DROP TABLE t;

12
test/sql/ivfflat_l1.sql Normal file
View File

@@ -0,0 +1,12 @@
SET enable_seqscan = off;
CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING ivfflat (val vector_l1_ops) WITH (lists = 1);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <+> '[3,3,3]';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <+> (SELECT NULL::vector)) t2;
DROP TABLE t;

View File

@@ -70,8 +70,8 @@ for (1 .. 20)
}
# Check each index type
my @operators = ("<->", "<#>", "<=>");
my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops");
my @operators = ("<->", "<#>", "<=>", "<+>");
my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops", "vector_l1_ops");
for my $i (0 .. $#operators)
{

View File

@@ -17,8 +17,8 @@ $node->safe_psql("postgres",
);
# Check each index type
my @operators = ("<->", "<#>", "<=>");
my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops");
my @operators = ("<->", "<#>", "<=>", "<+>");
my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops", "vector_l1_ops");
for my $i (0 .. $#operators)
{

View File

@@ -67,8 +67,8 @@ for (1 .. 20)
}
# Check each index type
my @operators = ("<->", "<#>", "<=>");
my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops");
my @operators = ("<->", "<#>", "<=>", "<+>");
my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops", "vector_l1_ops");
for my $i (0 .. $#operators)
{

View File

@@ -64,8 +64,8 @@ for (1 .. 20)
}
# Check each index type
my @operators = ("<->", "<#>", "<=>");
my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops");
my @operators = ("<->", "<#>", "<=>", "<+>");
my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops", "vector_l1_ops");
for my $i (0 .. $#operators)
{

View File

@@ -66,8 +66,8 @@ for (1 .. 20)
}
# Check each index type
my @operators = ("<->", "<#>", "<=>");
my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops");
my @operators = ("<->", "<#>", "<=>", "<+>");
my @opclasses = ("vector_l2_ops", "vector_ip_ops", "vector_cosine_ops", "vector_l1_ops");
for my $i (0 .. $#operators)
{