From 78e5bcf22952ac1ca999f387d3ffc540ca9d8300 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 24 Apr 2024 12:51:24 -0700 Subject: [PATCH] Switched to 0-based numbering for sparsevec on-disk format --- src/halfvec.c | 2 +- src/sparsevec.c | 25 ++++++++++++++----------- src/sparsevec.h | 6 ++++-- src/vector.c | 2 +- 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/halfvec.c b/src/halfvec.c index ce358ca..32e82dd 100644 --- a/src/halfvec.c +++ b/src/halfvec.c @@ -1206,7 +1206,7 @@ sparsevec_to_halfvec(PG_FUNCTION_ARGS) result = InitHalfVector(dim); for (int i = 0; i < svec->nnz; i++) - result->x[svec->indices[i] - 1] = Float4ToHalf(values[i]); + result->x[svec->indices[i]] = Float4ToHalf(values[i]); PG_RETURN_POINTER(result); } diff --git a/src/sparsevec.c b/src/sparsevec.c index 32d6195..7ca6416 100644 --- a/src/sparsevec.c +++ b/src/sparsevec.c @@ -98,12 +98,14 @@ CheckIndex(int32 *indices, int i, int dim) { int32 index = indices[i]; - if (index < 1) + /* TODO Better error message for binary format */ + if (index < 0) ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("index must be greater than zero"))); - if (index > dim) + /* TODO Better error message for binary format */ + if (index >= dim) ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("index must be less than or equal to dimensions"))); @@ -273,8 +275,8 @@ sparsevec_in(PG_FUNCTION_ARGS) /* Keep in int range for correct error message later */ if (index > INT_MAX) index = INT_MAX; - else if (index < INT_MIN) - index = INT_MIN; + else if (index < INT_MIN + 1) + index = INT_MIN + 1; pt = stringEnd; @@ -313,7 +315,8 @@ sparsevec_in(PG_FUNCTION_ARGS) /* Do not store zero values */ if (value != 0) { - elements[nnz].index = index; + /* Convert 1-based numbering (SQL) to 0-based (C) */ + elements[nnz].index = index - 1; elements[nnz].value = value; nnz++; } @@ -447,7 +450,8 @@ sparsevec_out(PG_FUNCTION_ARGS) if (i > 0) AppendChar(ptr, ','); - AppendInt(ptr, sparsevec->indices[i]); + /* Convert 0-based numbering (C) to 1-based (SQL) */ + AppendInt(ptr, sparsevec->indices[i] + 1); AppendChar(ptr, ':'); AppendFloat(ptr, values[i]); } @@ -615,7 +619,7 @@ vector_to_sparsevec(PG_FUNCTION_ARGS) if (j >= result->nnz) elog(ERROR, "safety check failed"); - result->indices[j] = i + 1; + result->indices[j] = i; values[j] = vec->x[i]; j++; } @@ -658,7 +662,7 @@ halfvec_to_sparsevec(PG_FUNCTION_ARGS) if (j >= result->nnz) elog(ERROR, "safety check failed"); - result->indices[j] = i + 1; + result->indices[j] = i; values[j] = HalfToFloat4(vec->x[i]); j++; } @@ -1019,11 +1023,10 @@ sparsevec_cmp_internal(SparseVector * a, SparseVector * b) return 1; } - /* Check <= dim since indices start at 1 */ - if (a->nnz < b->nnz && b->indices[nnz] <= a->dim) + if (a->nnz < b->nnz && b->indices[nnz] < a->dim) return bx[nnz] < 0 ? 1 : -1; - if (a->nnz > b->nnz && a->indices[nnz] <= b->dim) + if (a->nnz > b->nnz && a->indices[nnz] < b->dim) return ax[nnz] < 0 ? -1 : 1; if (a->dim < b->dim) diff --git a/src/sparsevec.h b/src/sparsevec.h index c019e44..6a387d4 100644 --- a/src/sparsevec.h +++ b/src/sparsevec.h @@ -10,8 +10,10 @@ #define PG_GETARG_SPARSEVEC_P(x) DatumGetSparseVector(PG_GETARG_DATUM(x)) #define PG_RETURN_SPARSEVEC_P(x) PG_RETURN_POINTER(x) -/* Indices are always sorted */ -/* Values come after indices */ +/* + * Indices use 0-based numbering for the on-disk (and binary) format (consistent with C) + * and are always sorted. Values come after indices. + */ typedef struct SparseVector { int32 vl_len_; /* varlena header (do not touch directly!) */ diff --git a/src/vector.c b/src/vector.c index 098df23..c3e0fa9 100644 --- a/src/vector.c +++ b/src/vector.c @@ -1318,7 +1318,7 @@ sparsevec_to_vector(PG_FUNCTION_ARGS) result = InitVector(dim); for (int i = 0; i < svec->nnz; i++) - result->x[svec->indices[i] - 1] = values[i]; + result->x[svec->indices[i]] = values[i]; PG_RETURN_POINTER(result); }