Added support for indexing Jaccard distance

This commit is contained in:
Andrew Kane
2024-03-25 22:44:51 -07:00
parent 791fc2436f
commit 02c4f4884c
5 changed files with 54 additions and 0 deletions

View File

@@ -722,6 +722,7 @@ sum(vector) → vector | sum | 0.5.0
Operator | Description | Added
--- | --- | ---
<~> | Hamming distance | 0.7.0
<%> | Jaccard distance | 0.7.0
### Bit Functions

View File

@@ -15,7 +15,17 @@ CREATE OPERATOR <~> (
COMMUTATOR = '<~>'
);
CREATE OPERATOR <%> (
LEFTARG = bit, RIGHTARG = bit, PROCEDURE = jaccard_distance,
COMMUTATOR = '<%>'
);
CREATE OPERATOR CLASS bit_hamming_ops
FOR TYPE bit USING hnsw AS
OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops,
FUNCTION 1 hamming_distance(bit, bit);
CREATE OPERATOR CLASS bit_jaccard_ops
FOR TYPE bit USING hnsw AS
OPERATOR 1 <%> (bit, bit) FOR ORDER BY float_ops,
FUNCTION 1 jaccard_distance(bit, bit);

View File

@@ -304,7 +304,17 @@ CREATE OPERATOR <~> (
COMMUTATOR = '<~>'
);
CREATE OPERATOR <%> (
LEFTARG = bit, RIGHTARG = bit, PROCEDURE = jaccard_distance,
COMMUTATOR = '<%>'
);
CREATE OPERATOR CLASS bit_hamming_ops
FOR TYPE bit USING hnsw AS
OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops,
FUNCTION 1 hamming_distance(bit, bit);
CREATE OPERATOR CLASS bit_jaccard_ops
FOR TYPE bit USING hnsw AS
OPERATOR 1 <%> (bit, bit) FOR ORDER BY float_ops,
FUNCTION 1 jaccard_distance(bit, bit);

View File

@@ -0,0 +1,21 @@
SET enable_seqscan = off;
CREATE TABLE t (val bit(4));
INSERT INTO t (val) VALUES (B'0000'), (B'1100'), (B'1111'), (NULL);
CREATE INDEX ON t USING hnsw (val bit_jaccard_ops);
INSERT INTO t (val) VALUES (B'1110');
SELECT * FROM t ORDER BY val <%> B'1111';
val
------
1111
1110
1100
0000
(4 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <%> (SELECT NULL::bit)) t2;
count
-------
4
(1 row)
DROP TABLE t;

12
test/sql/hnsw_jaccard.sql Normal file
View File

@@ -0,0 +1,12 @@
SET enable_seqscan = off;
CREATE TABLE t (val bit(4));
INSERT INTO t (val) VALUES (B'0000'), (B'1100'), (B'1111'), (NULL);
CREATE INDEX ON t USING hnsw (val bit_jaccard_ops);
INSERT INTO t (val) VALUES (B'1110');
SELECT * FROM t ORDER BY val <%> B'1111';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <%> (SELECT NULL::bit)) t2;
DROP TABLE t;