From 78e5bcf22952ac1ca999f387d3ffc540ca9d8300 Mon Sep 17 00:00:00 2001
From: Andrew Kane <andrew@ankane.org>
Date: Wed, 24 Apr 2024 12:51:24 -0700
Subject: [PATCH] Switched to 0-based numbering for sparsevec on-disk format

---
 src/halfvec.c   |  2 +-
 src/sparsevec.c | 25 ++++++++++++++-----------
 src/sparsevec.h |  6 ++++--
 src/vector.c    |  2 +-
 4 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/halfvec.c b/src/halfvec.c
index ce358ca..32e82dd 100644
--- a/src/halfvec.c
+++ b/src/halfvec.c
@@ -1206,7 +1206,7 @@ sparsevec_to_halfvec(PG_FUNCTION_ARGS)
 
 	result = InitHalfVector(dim);
 	for (int i = 0; i < svec->nnz; i++)
-		result->x[svec->indices[i] - 1] = Float4ToHalf(values[i]);
+		result->x[svec->indices[i]] = Float4ToHalf(values[i]);
 
 	PG_RETURN_POINTER(result);
 }
diff --git a/src/sparsevec.c b/src/sparsevec.c
index 32d6195..7ca6416 100644
--- a/src/sparsevec.c
+++ b/src/sparsevec.c
@@ -98,12 +98,14 @@ CheckIndex(int32 *indices, int i, int dim)
 {
 	int32		index = indices[i];
 
-	if (index < 1)
+	/* TODO Better error message for binary format */
+	if (index < 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_DATA_EXCEPTION),
 				 errmsg("index must be greater than zero")));
 
-	if (index > dim)
+	/* TODO Better error message for binary format */
+	if (index >= dim)
 		ereport(ERROR,
 				(errcode(ERRCODE_DATA_EXCEPTION),
 				 errmsg("index must be less than or equal to dimensions")));
@@ -273,8 +275,8 @@ sparsevec_in(PG_FUNCTION_ARGS)
 			/* Keep in int range for correct error message later */
 			if (index > INT_MAX)
 				index = INT_MAX;
-			else if (index < INT_MIN)
-				index = INT_MIN;
+			else if (index < INT_MIN + 1)
+				index = INT_MIN + 1;
 
 			pt = stringEnd;
 
@@ -313,7 +315,8 @@ sparsevec_in(PG_FUNCTION_ARGS)
 			/* Do not store zero values */
 			if (value != 0)
 			{
-				elements[nnz].index = index;
+				/* Convert 1-based numbering (SQL) to 0-based (C) */
+				elements[nnz].index = index - 1;
 				elements[nnz].value = value;
 				nnz++;
 			}
@@ -447,7 +450,8 @@ sparsevec_out(PG_FUNCTION_ARGS)
 		if (i > 0)
 			AppendChar(ptr, ',');
 
-		AppendInt(ptr, sparsevec->indices[i]);
+		/* Convert 0-based numbering (C) to 1-based (SQL) */
+		AppendInt(ptr, sparsevec->indices[i] + 1);
 		AppendChar(ptr, ':');
 		AppendFloat(ptr, values[i]);
 	}
@@ -615,7 +619,7 @@ vector_to_sparsevec(PG_FUNCTION_ARGS)
 			if (j >= result->nnz)
 				elog(ERROR, "safety check failed");
 
-			result->indices[j] = i + 1;
+			result->indices[j] = i;
 			values[j] = vec->x[i];
 			j++;
 		}
@@ -658,7 +662,7 @@ halfvec_to_sparsevec(PG_FUNCTION_ARGS)
 			if (j >= result->nnz)
 				elog(ERROR, "safety check failed");
 
-			result->indices[j] = i + 1;
+			result->indices[j] = i;
 			values[j] = HalfToFloat4(vec->x[i]);
 			j++;
 		}
@@ -1019,11 +1023,10 @@ sparsevec_cmp_internal(SparseVector * a, SparseVector * b)
 			return 1;
 	}
 
-	/* Check <= dim since indices start at 1 */
-	if (a->nnz < b->nnz && b->indices[nnz] <= a->dim)
+	if (a->nnz < b->nnz && b->indices[nnz] < a->dim)
 		return bx[nnz] < 0 ? 1 : -1;
 
-	if (a->nnz > b->nnz && a->indices[nnz] <= b->dim)
+	if (a->nnz > b->nnz && a->indices[nnz] < b->dim)
 		return ax[nnz] < 0 ? -1 : 1;
 
 	if (a->dim < b->dim)
diff --git a/src/sparsevec.h b/src/sparsevec.h
index c019e44..6a387d4 100644
--- a/src/sparsevec.h
+++ b/src/sparsevec.h
@@ -10,8 +10,10 @@
 #define PG_GETARG_SPARSEVEC_P(x)	DatumGetSparseVector(PG_GETARG_DATUM(x))
 #define PG_RETURN_SPARSEVEC_P(x)	PG_RETURN_POINTER(x)
 
-/* Indices are always sorted */
-/* Values come after indices */
+/*
+ * Indices use 0-based numbering for the on-disk (and binary) format (consistent with C)
+ * and are always sorted. Values come after indices.
+ */
 typedef struct SparseVector
 {
 	int32		vl_len_;		/* varlena header (do not touch directly!) */
diff --git a/src/vector.c b/src/vector.c
index 098df23..c3e0fa9 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -1318,7 +1318,7 @@ sparsevec_to_vector(PG_FUNCTION_ARGS)
 
 	result = InitVector(dim);
 	for (int i = 0; i < svec->nnz; i++)
-		result->x[svec->indices[i] - 1] = values[i];
+		result->x[svec->indices[i]] = values[i];
 
 	PG_RETURN_POINTER(result);
 }