From abac7a3f776d4edbb423a000ba5234d3e8eab465 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Tue, 2 Apr 2024 14:25:09 -0700
Subject: [PATCH] Added sparsevec type

---
 CHANGELOG.md                            |   1 +
 Makefile                                |   4 +-
 Makefile.win                            |   6 +-
 README.md                               |  22 +
 sql/vector--0.6.2--0.7.0.sql            |  93 +++
 sql/vector.sql                          | 107 ++++
 src/hnsw.h                              |   5 +-
 src/hnswbuild.c                         |   5 +
 src/hnswinsert.c                        |   6 +-
 src/hnswutils.c                         |  33 +
 src/sparsevec.c                         | 778 ++++++++++++++++++++++++
 src/sparsevec.h                         |  24 +
 src/vector.c                            |  24 +
 test/expected/hnsw_sparsevec_cosine.out |  26 +
 test/expected/hnsw_sparsevec_ip.out     |  21 +
 test/expected/hnsw_sparsevec_l2.out     |  43 ++
 test/expected/sparsevec_functions.out   |  62 ++
 test/expected/sparsevec_input.out       |  62 ++
 test/sql/hnsw_sparsevec_cosine.sql      |  13 +
 test/sql/hnsw_sparsevec_ip.sql          |  12 +
 test/sql/hnsw_sparsevec_l2.sql          |  25 +
 test/sql/sparsevec_functions.sql        |  13 +
 test/sql/sparsevec_input.sql            |  19 +
 23 files changed, 1397 insertions(+), 7 deletions(-)
 create mode 100644 src/sparsevec.c
 create mode 100644 src/sparsevec.h
 create mode 100644 test/expected/hnsw_sparsevec_cosine.out
 create mode 100644 test/expected/hnsw_sparsevec_ip.out
 create mode 100644 test/expected/hnsw_sparsevec_l2.out
 create mode 100644 test/expected/sparsevec_functions.out
 create mode 100644 test/expected/sparsevec_input.out
 create mode 100644 test/sql/hnsw_sparsevec_cosine.sql
 create mode 100644 test/sql/hnsw_sparsevec_ip.sql
 create mode 100644 test/sql/hnsw_sparsevec_l2.sql
 create mode 100644 test/sql/sparsevec_functions.sql
 create mode 100644 test/sql/sparsevec_input.sql

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5bf1395..961b6df 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
 ## 0.7.0 (unreleased)
 
 - Added `halfvec` type
+- Added `sparsevec` type
 - Added support for bit vectors to HNSW
 - Added `hamming_distance` function
 - Added `jaccard_distance` function
diff --git a/Makefile b/Makefile
index cab9397..a7be0ef 100644
--- a/Makefile
+++ b/Makefile
@@ -3,8 +3,8 @@ EXTVERSION = 0.6.2
 
 MODULE_big = vector
 DATA = $(wildcard sql/*--*.sql)
-OBJS = src/bitvector.o src/halfvec.o src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/vector.o
-HEADERS = src/halfvec.h src/vector.h
+OBJS = src/bitvector.o src/halfvec.o src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/sparsevec.o src/vector.o
+HEADERS = src/halfvec.h src/sparsevec.h src/vector.h
 
 TESTS = $(wildcard test/sql/*.sql)
 REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS))
diff --git a/Makefile.win b/Makefile.win
index 04ece60..48fd71b 100644
--- a/Makefile.win
+++ b/Makefile.win
@@ -1,10 +1,10 @@
 EXTENSION = vector
 EXTVERSION = 0.6.2
 
-OBJS = src\bitvector.obj src\halfvec.obj src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\vector.obj
-HEADERS = src\halfvec.h src\vector.h
+OBJS = src\bitvector.obj src\halfvec.obj src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\sparsevec.obj src\vector.obj
+HEADERS = src\halfvec.h src\sparsevec.h src\vector.h
 
-REGRESS = bit_functions btree cast copy halfvec_functions halfvec_input hnsw_bit_hamming hnsw_bit_jaccard hnsw_halfvec_cosine hnsw_halfvec_ip hnsw_halfvec_l2 hnsw_options hnsw_unlogged hnsw_vector_cosine hnsw_vector_ip hnsw_vector_l2 ivfflat_options ivfflat_unlogged ivfflat_vector_cosine ivfflat_vector_ip ivfflat_vector_l2 vector_functions vector_input
+REGRESS = bit_functions btree cast copy halfvec_functions halfvec_input hnsw_bit_hamming hnsw_bit_jaccard hnsw_halfvec_cosine hnsw_halfvec_ip hnsw_halfvec_l2 hnsw_options hnsw_sparsevec_cosine hnsw_sparsevec_ip hnsw_sparsevec_l2 hnsw_unlogged hnsw_vector_cosine hnsw_vector_ip hnsw_vector_l2 ivfflat_options ivfflat_unlogged ivfflat_vector_cosine ivfflat_vector_ip ivfflat_vector_l2 sparsevec_functions sparsevec_input vector_functions vector_input
 REGRESS_OPTS = --inputdir=test --load-extension=$(EXTENSION)
 
 # For /arch flags
diff --git a/README.md b/README.md
index cafe953..4a78093 100644
--- a/README.md
+++ b/README.md
@@ -714,6 +714,7 @@ Also, note that `NULL` vectors are not indexed (as well as zero vectors for cosi
 - [Vector](#vector-type)
 - [Halfvec](#halfvec-type)
 - [Bit](#bit-type)
+- [Sparsevec](#sparsevec-type)
 
 ### Vector Type
 
@@ -789,6 +790,27 @@ Function | Description | Added
 hamming_distance(bit, bit) → double precision | Hamming distance | unreleased
 jaccard_distance(bit, bit) → double precision | Jaccard distance | unreleased
 
+### Sparsevec Type
+
+Each sparse vector takes `8 * nnz + 16` bytes of storage. Each element is a single-precision floating-point number, and all elements must be finite (no `NaN`, `Infinity` or `-Infinity`).
+
+### Sparsevec Operators
+
+Operator | Description | Added
+--- | --- | ---
+<-> | Euclidean distance | unreleased
+<#> | negative inner product | unreleased
+<=> | cosine distance | unreleased
+
+### Sparsevec Functions
+
+Function | Description | Added
+--- | --- | ---
+cosine_distance(sparsevec, sparsevec) → double precision | cosine distance | unreleased
+inner_product(sparsevec, sparsevec) → double precision | inner product | unreleased
+l2_distance(sparsevec, sparsevec) → double precision | Euclidean distance | unreleased
+l1_distance(sparsevec, sparsevec) → double precision | taxicab distance | unreleased
+
 ## Installation Notes - Linux and Mac
 
 ### Postgres Location
diff --git a/sql/vector--0.6.2--0.7.0.sql b/sql/vector--0.6.2--0.7.0.sql
index dffd83c..f767d6a 100644
--- a/sql/vector--0.6.2--0.7.0.sql
+++ b/sql/vector--0.6.2--0.7.0.sql
@@ -158,3 +158,96 @@ CREATE CAST (halfvec AS vector)
 
 CREATE CAST (vector AS halfvec)
 	WITH FUNCTION vector_to_halfvec(vector, integer, boolean) AS IMPLICIT;
+
+CREATE TYPE sparsevec;
+
+CREATE FUNCTION sparsevec_in(cstring, oid, integer) RETURNS sparsevec
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_out(sparsevec) RETURNS cstring
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_typmod_in(cstring[]) RETURNS integer
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_recv(internal, oid, integer) RETURNS sparsevec
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_send(sparsevec) RETURNS bytea
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE TYPE sparsevec (
+	INPUT     = sparsevec_in,
+	OUTPUT    = sparsevec_out,
+	TYPMOD_IN = sparsevec_typmod_in,
+	RECEIVE   = sparsevec_recv,
+	SEND      = sparsevec_send,
+	STORAGE   = external
+);
+
+CREATE FUNCTION l2_distance(sparsevec, sparsevec) RETURNS float8
+	AS 'MODULE_PATHNAME', 'sparsevec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION inner_product(sparsevec, sparsevec) RETURNS float8
+	AS 'MODULE_PATHNAME', 'sparsevec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION cosine_distance(sparsevec, sparsevec) RETURNS float8
+	AS 'MODULE_PATHNAME', 'sparsevec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_norm(sparsevec) RETURNS float8
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_l2_squared_distance(sparsevec, sparsevec) RETURNS float8
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_negative_inner_product(sparsevec, sparsevec) RETURNS float8
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec(sparsevec, integer, boolean) RETURNS sparsevec
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION vector_to_sparsevec(vector, integer, boolean) RETURNS sparsevec
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) RETURNS vector
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE CAST (sparsevec AS sparsevec)
+	WITH FUNCTION sparsevec(sparsevec, integer, boolean) AS IMPLICIT;
+
+CREATE CAST (sparsevec AS vector)
+	WITH FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) AS IMPLICIT;
+
+CREATE CAST (vector AS sparsevec)
+	WITH FUNCTION vector_to_sparsevec(vector, integer, boolean) AS IMPLICIT;
+
+CREATE OPERATOR <-> (
+	LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = l2_distance,
+	COMMUTATOR = '<->'
+);
+
+CREATE OPERATOR <#> (
+	LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_negative_inner_product,
+	COMMUTATOR = '<#>'
+);
+
+CREATE OPERATOR <=> (
+	LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = cosine_distance,
+	COMMUTATOR = '<=>'
+);
+
+CREATE OPERATOR CLASS sparsevec_l2_ops
+	FOR TYPE sparsevec USING hnsw AS
+	OPERATOR 1 <-> (sparsevec, sparsevec) FOR ORDER BY float_ops,
+	FUNCTION 1 sparsevec_l2_squared_distance(sparsevec, sparsevec);
+
+CREATE OPERATOR CLASS sparsevec_ip_ops
+	FOR TYPE sparsevec USING hnsw AS
+	OPERATOR 1 <#> (sparsevec, sparsevec) FOR ORDER BY float_ops,
+	FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec);
+
+CREATE OPERATOR CLASS sparsevec_cosine_ops
+	FOR TYPE sparsevec USING hnsw AS
+	OPERATOR 1 <=> (sparsevec, sparsevec) FOR ORDER BY float_ops,
+	FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec),
+	FUNCTION 2 sparsevec_norm(sparsevec);
diff --git a/sql/vector.sql b/sql/vector.sql
index 3fc5081..f21b100 100644
--- a/sql/vector.sql
+++ b/sql/vector.sql
@@ -463,3 +463,110 @@ CREATE CAST (halfvec AS vector)
 
 CREATE CAST (vector AS halfvec)
 	WITH FUNCTION vector_to_halfvec(vector, integer, boolean) AS IMPLICIT;
+
+--- sparsevec type
+
+CREATE TYPE sparsevec;
+
+CREATE FUNCTION sparsevec_in(cstring, oid, integer) RETURNS sparsevec
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_out(sparsevec) RETURNS cstring
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_typmod_in(cstring[]) RETURNS integer
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_recv(internal, oid, integer) RETURNS sparsevec
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_send(sparsevec) RETURNS bytea
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE TYPE sparsevec (
+	INPUT     = sparsevec_in,
+	OUTPUT    = sparsevec_out,
+	TYPMOD_IN = sparsevec_typmod_in,
+	RECEIVE   = sparsevec_recv,
+	SEND      = sparsevec_send,
+	STORAGE   = external
+);
+
+-- sparsevec functions
+
+CREATE FUNCTION l2_distance(sparsevec, sparsevec) RETURNS float8
+	AS 'MODULE_PATHNAME', 'sparsevec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION inner_product(sparsevec, sparsevec) RETURNS float8
+	AS 'MODULE_PATHNAME', 'sparsevec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION cosine_distance(sparsevec, sparsevec) RETURNS float8
+	AS 'MODULE_PATHNAME', 'sparsevec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_norm(sparsevec) RETURNS float8
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+-- sparsevec private functions
+
+CREATE FUNCTION sparsevec_l2_squared_distance(sparsevec, sparsevec) RETURNS float8
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_negative_inner_product(sparsevec, sparsevec) RETURNS float8
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+-- sparsevec cast functions
+
+CREATE FUNCTION sparsevec(sparsevec, integer, boolean) RETURNS sparsevec
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION vector_to_sparsevec(vector, integer, boolean) RETURNS sparsevec
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+CREATE FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) RETURNS vector
+	AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+-- sparsevec casts
+
+CREATE CAST (sparsevec AS sparsevec)
+	WITH FUNCTION sparsevec(sparsevec, integer, boolean) AS IMPLICIT;
+
+CREATE CAST (sparsevec AS vector)
+	WITH FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) AS IMPLICIT;
+
+CREATE CAST (vector AS sparsevec)
+	WITH FUNCTION vector_to_sparsevec(vector, integer, boolean) AS IMPLICIT;
+
+-- sparsevec operators
+
+CREATE OPERATOR <-> (
+	LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = l2_distance,
+	COMMUTATOR = '<->'
+);
+
+CREATE OPERATOR <#> (
+	LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_negative_inner_product,
+	COMMUTATOR = '<#>'
+);
+
+CREATE OPERATOR <=> (
+	LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = cosine_distance,
+	COMMUTATOR = '<=>'
+);
+
+-- sparsevec opclasses
+
+CREATE OPERATOR CLASS sparsevec_l2_ops
+	FOR TYPE sparsevec USING hnsw AS
+	OPERATOR 1 <-> (sparsevec, sparsevec) FOR ORDER BY float_ops,
+	FUNCTION 1 sparsevec_l2_squared_distance(sparsevec, sparsevec);
+
+CREATE OPERATOR CLASS sparsevec_ip_ops
+	FOR TYPE sparsevec USING hnsw AS
+	OPERATOR 1 <#> (sparsevec, sparsevec) FOR ORDER BY float_ops,
+	FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec);
+
+CREATE OPERATOR CLASS sparsevec_cosine_ops
+	FOR TYPE sparsevec USING hnsw AS
+	OPERATOR 1 <=> (sparsevec, sparsevec) FOR ORDER BY float_ops,
+	FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec),
+	FUNCTION 2 sparsevec_norm(sparsevec);
diff --git a/src/hnsw.h b/src/hnsw.h
index 3012f5f..772b228 100644
--- a/src/hnsw.h
+++ b/src/hnsw.h
@@ -17,6 +17,7 @@
 #endif
 
 #define HNSW_MAX_DIM 2000
+#define HNSW_MAX_NNZ 1000
 
 /* Support functions */
 #define HNSW_DISTANCE_PROC 1
@@ -59,7 +60,8 @@ typedef enum HnswType
 {
 	HNSW_TYPE_VECTOR,
 	HNSW_TYPE_HALFVEC,
-	HNSW_TYPE_BIT
+	HNSW_TYPE_BIT,
+	HNSW_TYPE_SPARSEVEC
 }			HnswType;
 
 /* Build phases */
@@ -376,6 +378,7 @@ int			HnswGetEfConstruction(Relation index);
 FmgrInfo   *HnswOptionalProcInfo(Relation index, uint16 procnum);
 HnswType	HnswGetType(Relation index);
 bool		HnswNormValue(FmgrInfo *procinfo, Oid collation, Datum *value, HnswType type);
+void		HnswCheckValue(Datum value, HnswType type);
 Buffer		HnswNewBuffer(Relation index, ForkNumber forkNum);
 void		HnswInitPage(Buffer buf, Page page);
 void		HnswInit(void);
diff --git a/src/hnswbuild.c b/src/hnswbuild.c
index 5e586f6..2300127 100644
--- a/src/hnswbuild.c
+++ b/src/hnswbuild.c
@@ -487,6 +487,9 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn
 	/* Detoast once for all calls */
 	Datum		value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
 
+	/* Check value */
+	HnswCheckValue(value, buildstate->type);
+
 	/* Normalize if needed */
 	if (buildstate->normprocinfo != NULL)
 	{
@@ -678,6 +681,8 @@ GetMaxDimensions(HnswType type)
 		maxDimensions *= 2;
 	else if (type == HNSW_TYPE_BIT)
 		maxDimensions *= 32;
+	else if (type == HNSW_TYPE_SPARSEVEC)
+		maxDimensions = INT_MAX;
 
 	return maxDimensions;
 }
diff --git a/src/hnswinsert.c b/src/hnswinsert.c
index 0e09cfa..c5ea1fd 100644
--- a/src/hnswinsert.c
+++ b/src/hnswinsert.c
@@ -614,15 +614,19 @@ HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_ti
 	Datum		value;
 	FmgrInfo   *normprocinfo;
 	Oid			collation = index->rd_indcollation[0];
+	HnswType	type = HnswGetType(index);
 
 	/* Detoast once for all calls */
 	value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
 
+	/* Check value */
+	HnswCheckValue(value, type);
+
 	/* Normalize if needed */
 	normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC);
 	if (normprocinfo != NULL)
 	{
-		if (!HnswNormValue(normprocinfo, collation, &value, HnswGetType(index)))
+		if (!HnswNormValue(normprocinfo, collation, &value, type))
 			return;
 	}
 
diff --git a/src/hnswutils.c b/src/hnswutils.c
index 272934c..e082808 100644
--- a/src/hnswutils.c
+++ b/src/hnswutils.c
@@ -8,6 +8,7 @@
 #include "halfvec.h"
 #include "hnsw.h"
 #include "lib/pairingheap.h"
+#include "sparsevec.h"
 #include "storage/bufmgr.h"
 #include "utils/datum.h"
 #include "utils/memdebug.h"
@@ -176,6 +177,8 @@ HnswGetType(Relation index)
 		result = HNSW_TYPE_VECTOR;
 	else if (strcmp(NameStr(type->typname), "halfvec") == 0)
 		result = HNSW_TYPE_HALFVEC;
+	else if (strcmp(NameStr(type->typname), "sparsevec") == 0)
+		result = HNSW_TYPE_SPARSEVEC;
 	else
 		elog(ERROR, "Unsupported type");
 
@@ -223,6 +226,21 @@ HnswNormValue(FmgrInfo *procinfo, Oid collation, Datum *value, HnswType type)
 
 			*value = PointerGetDatum(result);
 		}
+		else if (type == HNSW_TYPE_SPARSEVEC)
+		{
+			SparseVector *v = DatumGetSparseVector(*value);
+			SparseVector *result = InitSparseVector(v->dim, v->nnz);
+			float	   *vx = SPARSEVEC_VALUES(v);
+			float	   *rx = SPARSEVEC_VALUES(result);
+
+			for (int i = 0; i < v->nnz; i++)
+			{
+				result->indices[i] = v->indices[i];
+				rx[i] = vx[i] / norm;
+			}
+
+			*value = PointerGetDatum(result);
+		}
 		else
 			elog(ERROR, "Unsupported type");
 
@@ -232,6 +250,21 @@ HnswNormValue(FmgrInfo *procinfo, Oid collation, Datum *value, HnswType type)
 	return false;
 }
 
+/*
+ * Check if a value can be indexed
+ */
+void
+HnswCheckValue(Datum value, HnswType type)
+{
+	if (type == HNSW_TYPE_SPARSEVEC)
+	{
+		SparseVector *vec = DatumGetSparseVector(value);
+
+		if (vec->nnz > HNSW_MAX_NNZ)
+			elog(ERROR, "sparsevec cannot have more than %d non-zero elements for hnsw index", HNSW_MAX_NNZ);
+	}
+}
+
 /*
  * New buffer
  */
diff --git a/src/sparsevec.c b/src/sparsevec.c
new file mode 100644
index 0000000..22649cd
--- /dev/null
+++ b/src/sparsevec.c
@@ -0,0 +1,778 @@
+#include "postgres.h"
+
+#include <limits.h>
+#include <math.h>
+
+#include "fmgr.h"
+#include "libpq/pqformat.h"
+#include "sparsevec.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "vector.h"
+
+#if PG_VERSION_NUM >= 120000
+#include "common/shortest_dec.h"
+#include "utils/float.h"
+#else
+#include <float.h>
+#include "utils/builtins.h"
+#endif
+
+/*
+ * Ensure same dimensions
+ */
+static inline void
+CheckDims(SparseVector * a, SparseVector * b)
+{
+	if (a->dim != b->dim)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("different sparsevec dimensions %d and %d", a->dim, b->dim)));
+}
+
+/*
+ * Ensure expected dimensions
+ */
+static inline void
+CheckExpectedDim(int32 typmod, int dim)
+{
+	if (typmod != -1 && typmod != dim)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("expected %d dimensions, not %d", typmod, dim)));
+}
+
+/*
+ * Ensure valid dimensions
+ */
+static inline void
+CheckDim(int dim)
+{
+	if (dim < 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("sparsevec must have at least 1 dimension")));
+
+	if (dim > SPARSEVEC_MAX_DIM)
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("sparsevec cannot have more than %d dimensions", SPARSEVEC_MAX_DIM)));
+}
+
+/*
+ * Ensure valid nnz
+ */
+static inline void
+CheckNnz(int nnz, int dim)
+{
+	if (nnz < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("sparsevec must have at least one element")));
+
+	if (nnz > dim)
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("sparsevec cannot have more elements than dimensions")));
+}
+
+/*
+ * Ensure valid index
+ */
+static inline void
+CheckIndex(int32 *indices, int i, int dim)
+{
+	int32		index = indices[i];
+
+	if (index < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("index must not be negative")));
+
+	if (index >= dim)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("index must be less than dimensions")));
+
+	if (i > 0)
+	{
+		if (index < indices[i - 1])
+			ereport(ERROR,
+					(errcode(ERRCODE_DATA_EXCEPTION),
+					 errmsg("indexes must be in ascending order")));
+
+		if (index == indices[i - 1])
+			ereport(ERROR,
+					(errcode(ERRCODE_DATA_EXCEPTION),
+					 errmsg("indexes must not contain duplicates")));
+	}
+}
+
+/*
+ * Ensure finite element
+ */
+static inline void
+CheckElement(float value)
+{
+	if (isnan(value))
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("NaN not allowed in sparsevec")));
+
+	if (isinf(value))
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("infinite value not allowed in sparsevec")));
+}
+
+/*
+ * Allocate and initialize a new sparse vector
+ */
+SparseVector *
+InitSparseVector(int dim, int nnz)
+{
+	SparseVector *result;
+	int			size;
+
+	size = SPARSEVEC_SIZE(nnz);
+	result = (SparseVector *) palloc0(size);
+	SET_VARSIZE(result, size);
+	result->dim = dim;
+	result->nnz = nnz;
+
+	return result;
+}
+
+/*
+ * Check for whitespace, since array_isspace() is static
+ */
+static inline bool
+sparsevec_isspace(char ch)
+{
+	if (ch == ' ' ||
+		ch == '\t' ||
+		ch == '\n' ||
+		ch == '\r' ||
+		ch == '\v' ||
+		ch == '\f')
+		return true;
+	return false;
+}
+
+/*
+ * Convert textual representation to internal representation
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_in);
+Datum
+sparsevec_in(PG_FUNCTION_ARGS)
+{
+	char	   *lit = PG_GETARG_CSTRING(0);
+	int32		typmod = PG_GETARG_INT32(2);
+	int			dim;
+	char	   *pt;
+	char	   *stringEnd;
+	SparseVector *result;
+	float	   *rvalues;
+	char	   *litcopy = pstrdup(lit);
+	char	   *str = litcopy;
+	int32	   *indices;
+	float	   *values;
+	int			maxNnz;
+	int			nnz = 0;
+
+	maxNnz = 1;
+	pt = str;
+	while (*pt != '\0')
+	{
+		if (*pt == ',')
+			maxNnz++;
+
+		pt++;
+	}
+
+	indices = palloc(maxNnz * sizeof(int32));
+	values = palloc(maxNnz * sizeof(float));
+
+	while (sparsevec_isspace(*str))
+		str++;
+
+	if (*str != '{')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("malformed sparsevec literal: \"%s\"", lit),
+				 errdetail("Vector contents must start with \"{\".")));
+
+	str++;
+	pt = strtok(str, ",");
+	stringEnd = pt;
+
+	while (pt != NULL && *stringEnd != '}')
+	{
+		long		index;
+		float		value;
+
+		/* TODO Better error */
+		if (nnz == maxNnz)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("ran out of buffer: \"%s\"", lit)));
+
+		while (sparsevec_isspace(*pt))
+			pt++;
+
+		/* Check for empty string like float4in */
+		if (*pt == '\0')
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
+
+		/* Use similar logic as int2vectorin */
+		errno = 0;
+		index = strtol(pt, &stringEnd, 10);
+
+		if (stringEnd == pt)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
+
+		if (errno == ERANGE || index < 0 || index > INT_MAX)
+			ereport(ERROR,
+					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+					 errmsg("index \"%ld\" is out of range for type sparsevec", index)));
+
+		if (stringEnd == pt)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
+
+		while (sparsevec_isspace(*stringEnd))
+			stringEnd++;
+
+		if (*stringEnd != ':')
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
+
+		stringEnd++;
+
+		while (sparsevec_isspace(*stringEnd))
+			stringEnd++;
+
+		errno = 0;
+		pt = stringEnd;
+		value = strtof(pt, &stringEnd);
+
+		if (stringEnd == pt)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
+
+		/* Check for range error like float4in */
+		if (errno == ERANGE && (value == 0 || isinf(value)))
+			ereport(ERROR,
+					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+					 errmsg("\"%s\" is out of range for type sparsevec", pt)));
+
+		/* TODO Decide whether to store zero values */
+		if (value != 0)
+		{
+			indices[nnz] = index;
+			values[nnz] = value;
+			nnz++;
+		}
+
+		if (*stringEnd != '\0' && *stringEnd != '}')
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
+
+		pt = strtok(NULL, ",");
+	}
+
+	if (stringEnd == NULL || *stringEnd != '}')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("malformed sparsevec literal: \"%s\"", lit),
+				 errdetail("Unexpected end of input.")));
+
+	stringEnd++;
+
+	if (*stringEnd != '/')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("malformed sparsevec literal: \"%s\"", lit),
+				 errdetail("Unexpected end of input.")));
+
+	stringEnd++;
+
+	/* Use similar logic as int2vectorin */
+	errno = 0;
+	pt = stringEnd;
+	dim = strtol(pt, &stringEnd, 10);
+
+	if (stringEnd == pt)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
+
+	/* Only whitespace is allowed after the closing brace */
+	while (sparsevec_isspace(*stringEnd))
+		stringEnd++;
+
+	if (*stringEnd != '\0')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("malformed sparsevec literal: \"%s\"", lit),
+				 errdetail("Junk after closing.")));
+
+	pfree(litcopy);
+
+	CheckDim(dim);
+	CheckExpectedDim(typmod, dim);
+
+	result = InitSparseVector(dim, nnz);
+	rvalues = SPARSEVEC_VALUES(result);
+	for (int i = 0; i < nnz; i++)
+	{
+		result->indices[i] = indices[i];
+		rvalues[i] = values[i];
+
+		CheckIndex(result->indices, i, dim);
+		CheckElement(rvalues[i]);
+	}
+
+	PG_RETURN_POINTER(result);
+}
+
+#define AppendChar(ptr, c) (*(ptr)++ = (c))
+#define AppendFloat(ptr, f) ((ptr) += float_to_shortest_decimal_bufn((f), (ptr)))
+
+#if PG_VERSION_NUM >= 140000
+#define AppendInt(ptr, i) ((ptr) += pg_ltoa((i), (ptr)))
+#else
+#define AppendInt(ptr, i) \
+	do { \
+		pg_ltoa(i, ptr); \
+		while (*ptr != '\0') \
+			ptr++; \
+	} while (0)
+#endif
+
+/*
+ * Convert internal representation to textual representation
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_out);
+Datum
+sparsevec_out(PG_FUNCTION_ARGS)
+{
+	SparseVector *sparsevec = PG_GETARG_SPARSEVEC_P(0);
+	float	   *values = SPARSEVEC_VALUES(sparsevec);
+	char	   *buf;
+	char	   *ptr;
+
+	/*
+	 * Need:
+	 *
+	 * nnz * 10 bytes for index (positive integer)
+	 *
+	 * nnz bytes for :
+	 *
+	 * nnz * (FLOAT_SHORTEST_DECIMAL_LEN - 1) bytes for
+	 * float_to_shortest_decimal_bufn
+	 *
+	 * nnz - 1 bytes for ,
+	 *
+	 * 10 bytes for dimensions
+	 *
+	 * 4 bytes for {, }, /, and \0
+	 */
+	buf = (char *) palloc((11 + FLOAT_SHORTEST_DECIMAL_LEN) * sparsevec->nnz + 13);
+	ptr = buf;
+
+	AppendChar(ptr, '{');
+
+	for (int i = 0; i < sparsevec->nnz; i++)
+	{
+		if (i > 0)
+			AppendChar(ptr, ',');
+
+		AppendInt(ptr, sparsevec->indices[i]);
+		AppendChar(ptr, ':');
+		AppendFloat(ptr, values[i]);
+	}
+
+	AppendChar(ptr, '}');
+	AppendChar(ptr, '/');
+	AppendInt(ptr, sparsevec->dim);
+	*ptr = '\0';
+
+	PG_FREE_IF_COPY(sparsevec, 0);
+	PG_RETURN_CSTRING(buf);
+}
+
+/*
+ * Convert type modifier
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_typmod_in);
+Datum
+sparsevec_typmod_in(PG_FUNCTION_ARGS)
+{
+	ArrayType  *ta = PG_GETARG_ARRAYTYPE_P(0);
+	int32	   *tl;
+	int			n;
+
+	tl = ArrayGetIntegerTypmods(ta, &n);
+
+	if (n != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid type modifier")));
+
+	if (*tl < 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("dimensions for type sparsevec must be at least 1")));
+
+	if (*tl > SPARSEVEC_MAX_DIM)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("dimensions for type sparsevec cannot exceed %d", SPARSEVEC_MAX_DIM)));
+
+	PG_RETURN_INT32(*tl);
+}
+
+/*
+ * Convert external binary representation to internal representation
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_recv);
+Datum
+sparsevec_recv(PG_FUNCTION_ARGS)
+{
+	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
+	int32		typmod = PG_GETARG_INT32(2);
+	SparseVector *result;
+	int32		dim;
+	int32		nnz;
+	int32		unused;
+	float	   *values;
+
+	dim = pq_getmsgint(buf, sizeof(int32));
+	nnz = pq_getmsgint(buf, sizeof(int32));
+	unused = pq_getmsgint(buf, sizeof(int32));
+
+	CheckDim(dim);
+	CheckNnz(nnz, dim);
+	CheckExpectedDim(typmod, dim);
+
+	if (unused != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("expected unused to be 0, not %d", unused)));
+
+	result = InitSparseVector(dim, nnz);
+	values = SPARSEVEC_VALUES(result);
+
+	for (int i = 0; i < nnz; i++)
+	{
+		result->indices[i] = pq_getmsgint(buf, sizeof(int32));
+		CheckIndex(result->indices, i, dim);
+	}
+
+	for (int i = 0; i < nnz; i++)
+	{
+		values[i] = pq_getmsgfloat4(buf);
+		CheckElement(values[i]);
+	}
+
+	PG_RETURN_POINTER(result);
+}
+
+/*
+ * Convert internal representation to the external binary representation
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_send);
+Datum
+sparsevec_send(PG_FUNCTION_ARGS)
+{
+	SparseVector *svec = PG_GETARG_SPARSEVEC_P(0);
+	float	   *values = SPARSEVEC_VALUES(svec);
+	StringInfoData buf;
+
+	pq_begintypsend(&buf);
+	pq_sendint(&buf, svec->dim, sizeof(int32));
+	pq_sendint(&buf, svec->nnz, sizeof(int32));
+	pq_sendint(&buf, svec->unused, sizeof(int32));
+	for (int i = 0; i < svec->nnz; i++)
+		pq_sendint(&buf, svec->indices[i], sizeof(int32));
+	for (int i = 0; i < svec->nnz; i++)
+		pq_sendfloat4(&buf, values[i]);
+
+	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * Convert sparse vector to sparse vector
+ * This is needed to check the type modifier
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec);
+Datum
+sparsevec(PG_FUNCTION_ARGS)
+{
+	SparseVector *svec = PG_GETARG_SPARSEVEC_P(0);
+	int32		typmod = PG_GETARG_INT32(1);
+
+	CheckExpectedDim(typmod, svec->dim);
+
+	PG_RETURN_POINTER(svec);
+}
+
+/*
+ * Convert dense vector to sparse vector
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_sparsevec);
+Datum
+vector_to_sparsevec(PG_FUNCTION_ARGS)
+{
+	Vector	   *vec = PG_GETARG_VECTOR_P(0);
+	int32		typmod = PG_GETARG_INT32(1);
+	SparseVector *result;
+	int			dim = vec->dim;
+	int			nnz = 0;
+	float	   *values;
+	int			j = 0;
+
+	CheckDim(dim);
+	CheckExpectedDim(typmod, dim);
+
+	for (int i = 0; i < dim; i++)
+	{
+		if (vec->x[i] != 0)
+			nnz++;
+	}
+
+	result = InitSparseVector(dim, nnz);
+	values = SPARSEVEC_VALUES(result);
+	for (int i = 0; i < dim; i++)
+	{
+		if (vec->x[i] != 0)
+		{
+			/* Safety check */
+			if (j == nnz)
+				elog(ERROR, "safety check failed");
+
+			result->indices[j] = i;
+			values[j] = vec->x[i];
+			j++;
+		}
+	}
+
+	PG_RETURN_POINTER(result);
+}
+
+/*
+ *  Get the L2 squared distance between sparse vectors
+ */
+static double
+l2_distance_squared_internal(SparseVector * a, SparseVector * b)
+{
+	float	   *ax = SPARSEVEC_VALUES(a);
+	float	   *bx = SPARSEVEC_VALUES(b);
+	double		distance = 0.0;
+	int			bpos = 0;
+
+	for (int i = 0; i < a->nnz; i++)
+	{
+		int			ai = a->indices[i];
+		int			bi = -1;
+
+		for (int j = bpos; j < b->nnz; j++)
+		{
+			bi = b->indices[j];
+
+			if (ai == bi)
+			{
+				double		diff = ax[i] - bx[j];
+
+				distance += diff * diff;
+			}
+			else if (ai > bi)
+				distance += bx[j] * bx[j];
+
+			/* Update start for next iteration */
+			if (ai >= bi)
+				bpos = j + 1;
+
+			/* Found or passed it */
+			if (bi >= ai)
+				break;
+		}
+
+		if (ai != bi)
+			distance += ax[i] * ax[i];
+	}
+
+	for (int j = bpos; j < b->nnz; j++)
+		distance += bx[j] * bx[j];
+
+	return distance;
+}
+
+/*
+ * Get the L2 distance between sparse vectors
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_l2_distance);
+Datum
+sparsevec_l2_distance(PG_FUNCTION_ARGS)
+{
+	SparseVector *a = PG_GETARG_SPARSEVEC_P(0);
+	SparseVector *b = PG_GETARG_SPARSEVEC_P(1);
+
+	CheckDims(a, b);
+
+	PG_RETURN_FLOAT8(sqrt(l2_distance_squared_internal(a, b)));
+}
+
+/*
+ * Get the L2 squared distance between sparse vectors
+ * This saves a sqrt calculation
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_l2_squared_distance);
+Datum
+sparsevec_l2_squared_distance(PG_FUNCTION_ARGS)
+{
+	SparseVector *a = PG_GETARG_SPARSEVEC_P(0);
+	SparseVector *b = PG_GETARG_SPARSEVEC_P(1);
+
+	CheckDims(a, b);
+
+	PG_RETURN_FLOAT8(l2_distance_squared_internal(a, b));
+}
+
+/*
+ * Get the inner product of two sparse vectors
+ */
+static double
+inner_product_internal(SparseVector * a, SparseVector * b)
+{
+	float	   *ax = SPARSEVEC_VALUES(a);
+	float	   *bx = SPARSEVEC_VALUES(b);
+	double		distance = 0.0;
+	int			bpos = 0;
+
+	for (int i = 0; i < a->nnz; i++)
+	{
+		int			ai = a->indices[i];
+
+		for (int j = bpos; j < b->nnz; j++)
+		{
+			int			bi = b->indices[j];
+
+			/* Only update when the same index */
+			if (ai == bi)
+				distance += ax[i] * bx[j];
+
+			/* Update start for next iteration */
+			if (ai >= bi)
+				bpos = j + 1;
+
+			/* Found or passed it */
+			if (bi >= ai)
+				break;
+		}
+	}
+
+	return distance;
+}
+
+/*
+ * Get the inner product of two sparse vectors
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_inner_product);
+Datum
+sparsevec_inner_product(PG_FUNCTION_ARGS)
+{
+	SparseVector *a = PG_GETARG_SPARSEVEC_P(0);
+	SparseVector *b = PG_GETARG_SPARSEVEC_P(1);
+
+	CheckDims(a, b);
+
+	PG_RETURN_FLOAT8(inner_product_internal(a, b));
+}
+
+/*
+ * Get the negative inner product of two sparse vectors
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_negative_inner_product);
+Datum
+sparsevec_negative_inner_product(PG_FUNCTION_ARGS)
+{
+	SparseVector *a = PG_GETARG_SPARSEVEC_P(0);
+	SparseVector *b = PG_GETARG_SPARSEVEC_P(1);
+
+	CheckDims(a, b);
+
+	PG_RETURN_FLOAT8(-inner_product_internal(a, b));
+}
+
+/*
+ * Get the cosine distance between two sparse vectors
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_cosine_distance);
+Datum
+sparsevec_cosine_distance(PG_FUNCTION_ARGS)
+{
+	SparseVector *a = PG_GETARG_SPARSEVEC_P(0);
+	SparseVector *b = PG_GETARG_SPARSEVEC_P(1);
+	float	   *ax = SPARSEVEC_VALUES(a);
+	float	   *bx = SPARSEVEC_VALUES(b);
+	float		norma = 0.0;
+	float		normb = 0.0;
+	double		similarity;
+
+	CheckDims(a, b);
+
+	similarity = inner_product_internal(a, b);
+
+	/* Auto-vectorized */
+	for (int i = 0; i < a->nnz; i++)
+		norma += ax[i] * ax[i];
+
+	/* Auto-vectorized */
+	for (int i = 0; i < b->nnz; i++)
+		normb += bx[i] * bx[i];
+
+	/* Use sqrt(a * b) over sqrt(a) * sqrt(b) */
+	similarity /= sqrt((double) norma * (double) normb);
+
+#ifdef _MSC_VER
+	/* /fp:fast may not propagate NaN */
+	if (isnan(similarity))
+		PG_RETURN_FLOAT8(NAN);
+#endif
+
+	/* Keep in range */
+	if (similarity > 1)
+		similarity = 1.0;
+	else if (similarity < -1)
+		similarity = -1.0;
+
+	PG_RETURN_FLOAT8(1.0 - similarity);
+}
+
+/*
+ * Get the L2 norm of a sparse vector
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_norm);
+Datum
+sparsevec_norm(PG_FUNCTION_ARGS)
+{
+	SparseVector *a = PG_GETARG_SPARSEVEC_P(0);
+	float	   *ax = SPARSEVEC_VALUES(a);
+	double		norm = 0.0;
+
+	/* Auto-vectorized */
+	for (int i = 0; i < a->nnz; i++)
+		norm += (double) ax[i] * (double) ax[i];
+
+	PG_RETURN_FLOAT8(sqrt(norm));
+}
diff --git a/src/sparsevec.h b/src/sparsevec.h
new file mode 100644
index 0000000..673c5b0
--- /dev/null
+++ b/src/sparsevec.h
@@ -0,0 +1,24 @@
+#ifndef SPARSEVEC_H
+#define SPARSEVEC_H
+
+#define SPARSEVEC_MAX_DIM 100000
+
+/* Ensure values are aligned */
+#define SPARSEVEC_SIZE(_nnz)		(offsetof(SparseVector, indices) + MAXALIGN((_nnz) * sizeof(int32)) + (_nnz * sizeof(float)))
+#define SPARSEVEC_VALUES(x)		((float *) (((char *) (x)) + offsetof(SparseVector, indices) + MAXALIGN((x)->nnz * sizeof(int32))))
+#define DatumGetSparseVector(x)		((SparseVector *) PG_DETOAST_DATUM(x))
+#define PG_GETARG_SPARSEVEC_P(x)	DatumGetSparseVector(PG_GETARG_DATUM(x))
+#define PG_RETURN_SPARSEVEC_P(x)	PG_RETURN_POINTER(x)
+
+typedef struct SparseVector
+{
+	int32		vl_len_;		/* varlena header (do not touch directly!) */
+	int32		dim;			/* number of dimensions */
+	int32		nnz;
+	int32		unused;
+	int32		indices[FLEXIBLE_ARRAY_MEMBER];
+}			SparseVector;
+
+SparseVector *InitSparseVector(int dim, int nnz);
+
+#endif
diff --git a/src/vector.c b/src/vector.c
index 97d922f..c3871ea 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -12,6 +12,7 @@
 #include "lib/stringinfo.h"
 #include "libpq/pqformat.h"
 #include "port.h"				/* for strtof() */
+#include "sparsevec.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/float.h"
@@ -1214,3 +1215,26 @@ vector_avg(PG_FUNCTION_ARGS)
 
 	PG_RETURN_POINTER(result);
 }
+
+/*
+ * Convert sparse vector to dense vector
+ */
+PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_to_vector);
+Datum
+sparsevec_to_vector(PG_FUNCTION_ARGS)
+{
+	SparseVector *svec = PG_GETARG_SPARSEVEC_P(0);
+	int32		typmod = PG_GETARG_INT32(1);
+	Vector	   *result;
+	int			dim = svec->dim;
+	float	   *values = SPARSEVEC_VALUES(svec);
+
+	CheckDim(dim);
+	CheckExpectedDim(typmod, dim);
+
+	result = InitVector(dim);
+	for (int i = 0; i < svec->nnz; i++)
+		result->x[svec->indices[i]] = values[i];
+
+	PG_RETURN_POINTER(result);
+}
diff --git a/test/expected/hnsw_sparsevec_cosine.out b/test/expected/hnsw_sparsevec_cosine.out
new file mode 100644
index 0000000..778415e
--- /dev/null
+++ b/test/expected/hnsw_sparsevec_cosine.out
@@ -0,0 +1,26 @@
+SET enable_seqscan = off;
+CREATE TABLE t (val sparsevec(3));
+INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
+CREATE INDEX ON t USING hnsw (val sparsevec_cosine_ops);
+INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
+SELECT * FROM t ORDER BY val <=> '{0:3,1:3,2:3}/3';
+       val       
+-----------------
+ {0:1,1:1,2:1}/3
+ {0:1,1:2,2:3}/3
+ {0:1,1:2,2:4}/3
+(3 rows)
+
+SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '{}/3') t2;
+ count 
+-------
+     3
+(1 row)
+
+SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::sparsevec)) t2;
+ count 
+-------
+     3
+(1 row)
+
+DROP TABLE t;
diff --git a/test/expected/hnsw_sparsevec_ip.out b/test/expected/hnsw_sparsevec_ip.out
new file mode 100644
index 0000000..1c303f0
--- /dev/null
+++ b/test/expected/hnsw_sparsevec_ip.out
@@ -0,0 +1,21 @@
+SET enable_seqscan = off;
+CREATE TABLE t (val sparsevec(3));
+INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
+CREATE INDEX ON t USING hnsw (val sparsevec_ip_ops);
+INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
+SELECT * FROM t ORDER BY val <#> '{0:3,1:3,2:3}/3';
+       val       
+-----------------
+ {0:1,1:2,2:4}/3
+ {0:1,1:2,2:3}/3
+ {0:1,1:1,2:1}/3
+ {}/3
+(4 rows)
+
+SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::sparsevec)) t2;
+ count 
+-------
+     4
+(1 row)
+
+DROP TABLE t;
diff --git a/test/expected/hnsw_sparsevec_l2.out b/test/expected/hnsw_sparsevec_l2.out
new file mode 100644
index 0000000..adc5cfd
--- /dev/null
+++ b/test/expected/hnsw_sparsevec_l2.out
@@ -0,0 +1,43 @@
+SET enable_seqscan = off;
+CREATE TABLE t (val sparsevec(3));
+INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
+CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
+INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
+SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
+       val       
+-----------------
+ {0:1,1:2,2:3}/3
+ {0:1,1:2,2:4}/3
+ {0:1,1:1,2:1}/3
+ {}/3
+(4 rows)
+
+SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::sparsevec)) t2;
+ count 
+-------
+     4
+(1 row)
+
+SELECT COUNT(*) FROM t;
+ count 
+-------
+     5
+(1 row)
+
+TRUNCATE t;
+SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
+ val 
+-----
+(0 rows)
+
+DROP TABLE t;
+-- TODO move
+CREATE TABLE t (val sparsevec(1001));
+INSERT INTO t (val) VALUES (array_fill(1, ARRAY[1001])::vector::sparsevec);
+CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
+ERROR:  sparsevec cannot have more than 1000 non-zero elements for hnsw index
+TRUNCATE t;
+CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
+INSERT INTO t (val) VALUES (array_fill(1, ARRAY[1001])::vector::sparsevec);
+ERROR:  sparsevec cannot have more than 1000 non-zero elements for hnsw index
+DROP TABLE t;
diff --git a/test/expected/sparsevec_functions.out b/test/expected/sparsevec_functions.out
new file mode 100644
index 0000000..07117d8
--- /dev/null
+++ b/test/expected/sparsevec_functions.out
@@ -0,0 +1,62 @@
+SELECT l2_distance('{}/2'::sparsevec, '{0:3,1:4}/2');
+ l2_distance 
+-------------
+           5
+(1 row)
+
+SELECT l2_distance('{}/2'::sparsevec, '{1:1}/2');
+ l2_distance 
+-------------
+           1
+(1 row)
+
+SELECT '{}/2'::sparsevec <-> '{0:3,1:4}/2';
+ ?column? 
+----------
+        5
+(1 row)
+
+SELECT inner_product('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
+ inner_product 
+---------------
+            10
+(1 row)
+
+SELECT sparsevec_negative_inner_product('{0:1,1:2}/2', '{0:2,1:4}/2');
+ sparsevec_negative_inner_product 
+----------------------------------
+                              -10
+(1 row)
+
+SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
+ cosine_distance 
+-----------------
+               0
+(1 row)
+
+SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{}/2');
+ cosine_distance 
+-----------------
+             NaN
+(1 row)
+
+SELECT cosine_distance('{0:1,1:1}/2'::sparsevec, '{0:-1,1:-1}/2');
+ cosine_distance 
+-----------------
+               2
+(1 row)
+
+SELECT cosine_distance('{0:1}/2'::sparsevec, '{1:2}/2');
+ cosine_distance 
+-----------------
+               1
+(1 row)
+
+SELECT cosine_distance('{}/1'::sparsevec, '{}/1');
+ cosine_distance 
+-----------------
+             NaN
+(1 row)
+
+SELECT cosine_distance('{0:1}/2'::sparsevec, '{0:1}/3');
+ERROR:  different sparsevec dimensions 2 and 3
diff --git a/test/expected/sparsevec_input.out b/test/expected/sparsevec_input.out
new file mode 100644
index 0000000..bd2faf5
--- /dev/null
+++ b/test/expected/sparsevec_input.out
@@ -0,0 +1,62 @@
+SELECT '{0:1.5,2:3.5}/5'::sparsevec;
+    sparsevec    
+-----------------
+ {0:1.5,2:3.5}/5
+(1 row)
+
+SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector;
+     vector      
+-----------------
+ [1.5,0,3.5,0,0]
+(1 row)
+
+SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(5);
+     vector      
+-----------------
+ [1.5,0,3.5,0,0]
+(1 row)
+
+SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(4);
+ERROR:  expected 4 dimensions, not 5
+SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec;
+    sparsevec    
+-----------------
+ {1:1.5,3:3.5}/5
+(1 row)
+
+SELECT '{0:0,1:1,2:0}/3'::sparsevec;
+ sparsevec 
+-----------
+ {1:1}/3
+(1 row)
+
+SELECT '{1:1,0:1}/2'::sparsevec;
+ERROR:  indexes must be in ascending order
+LINE 1: SELECT '{1:1,0:1}/2'::sparsevec;
+               ^
+SELECT '{}/5'::sparsevec;
+ sparsevec 
+-----------
+ {}/5
+(1 row)
+
+SELECT '{}/-1'::sparsevec;
+ERROR:  sparsevec must have at least 1 dimension
+LINE 1: SELECT '{}/-1'::sparsevec;
+               ^
+SELECT '{}/100001'::sparsevec;
+ERROR:  sparsevec cannot have more than 100000 dimensions
+LINE 1: SELECT '{}/100001'::sparsevec;
+               ^
+SELECT '{}/16001'::sparsevec::vector;
+ERROR:  vector cannot have more than 16000 dimensions
+SELECT '{-1:1}/1'::sparsevec;
+ERROR:  index "-1" is out of range for type sparsevec
+LINE 1: SELECT '{-1:1}/1'::sparsevec;
+               ^
+SELECT '{1:1}/1'::sparsevec;
+ERROR:  index must be less than dimensions
+LINE 1: SELECT '{1:1}/1'::sparsevec;
+               ^
+SELECT '{}/1'::sparsevec(2);
+ERROR:  expected 2 dimensions, not 1
diff --git a/test/sql/hnsw_sparsevec_cosine.sql b/test/sql/hnsw_sparsevec_cosine.sql
new file mode 100644
index 0000000..685423c
--- /dev/null
+++ b/test/sql/hnsw_sparsevec_cosine.sql
@@ -0,0 +1,13 @@
+SET enable_seqscan = off;
+
+CREATE TABLE t (val sparsevec(3));
+INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
+CREATE INDEX ON t USING hnsw (val sparsevec_cosine_ops);
+
+INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
+
+SELECT * FROM t ORDER BY val <=> '{0:3,1:3,2:3}/3';
+SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '{}/3') t2;
+SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::sparsevec)) t2;
+
+DROP TABLE t;
diff --git a/test/sql/hnsw_sparsevec_ip.sql b/test/sql/hnsw_sparsevec_ip.sql
new file mode 100644
index 0000000..1888d9c
--- /dev/null
+++ b/test/sql/hnsw_sparsevec_ip.sql
@@ -0,0 +1,12 @@
+SET enable_seqscan = off;
+
+CREATE TABLE t (val sparsevec(3));
+INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
+CREATE INDEX ON t USING hnsw (val sparsevec_ip_ops);
+
+INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
+
+SELECT * FROM t ORDER BY val <#> '{0:3,1:3,2:3}/3';
+SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::sparsevec)) t2;
+
+DROP TABLE t;
diff --git a/test/sql/hnsw_sparsevec_l2.sql b/test/sql/hnsw_sparsevec_l2.sql
new file mode 100644
index 0000000..b472607
--- /dev/null
+++ b/test/sql/hnsw_sparsevec_l2.sql
@@ -0,0 +1,25 @@
+SET enable_seqscan = off;
+
+CREATE TABLE t (val sparsevec(3));
+INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
+CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
+
+INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
+
+SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
+SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::sparsevec)) t2;
+SELECT COUNT(*) FROM t;
+
+TRUNCATE t;
+SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
+
+DROP TABLE t;
+
+-- TODO move
+CREATE TABLE t (val sparsevec(1001));
+INSERT INTO t (val) VALUES (array_fill(1, ARRAY[1001])::vector::sparsevec);
+CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
+TRUNCATE t;
+CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
+INSERT INTO t (val) VALUES (array_fill(1, ARRAY[1001])::vector::sparsevec);
+DROP TABLE t;
diff --git a/test/sql/sparsevec_functions.sql b/test/sql/sparsevec_functions.sql
new file mode 100644
index 0000000..86f7990
--- /dev/null
+++ b/test/sql/sparsevec_functions.sql
@@ -0,0 +1,13 @@
+SELECT l2_distance('{}/2'::sparsevec, '{0:3,1:4}/2');
+SELECT l2_distance('{}/2'::sparsevec, '{1:1}/2');
+SELECT '{}/2'::sparsevec <-> '{0:3,1:4}/2';
+
+SELECT inner_product('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
+SELECT sparsevec_negative_inner_product('{0:1,1:2}/2', '{0:2,1:4}/2');
+
+SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
+SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{}/2');
+SELECT cosine_distance('{0:1,1:1}/2'::sparsevec, '{0:-1,1:-1}/2');
+SELECT cosine_distance('{0:1}/2'::sparsevec, '{1:2}/2');
+SELECT cosine_distance('{}/1'::sparsevec, '{}/1');
+SELECT cosine_distance('{0:1}/2'::sparsevec, '{0:1}/3');
diff --git a/test/sql/sparsevec_input.sql b/test/sql/sparsevec_input.sql
new file mode 100644
index 0000000..1fdfd88
--- /dev/null
+++ b/test/sql/sparsevec_input.sql
@@ -0,0 +1,19 @@
+SELECT '{0:1.5,2:3.5}/5'::sparsevec;
+SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector;
+SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(5);
+SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(4);
+SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec;
+
+SELECT '{0:0,1:1,2:0}/3'::sparsevec;
+
+SELECT '{1:1,0:1}/2'::sparsevec;
+
+SELECT '{}/5'::sparsevec;
+SELECT '{}/-1'::sparsevec;
+SELECT '{}/100001'::sparsevec;
+SELECT '{}/16001'::sparsevec::vector;
+
+SELECT '{-1:1}/1'::sparsevec;
+SELECT '{1:1}/1'::sparsevec;
+
+SELECT '{}/1'::sparsevec(2);