这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions server/__tests__/utils/vectorDbProviders/pgvector/index.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
const { PGVector } = require("../../../../utils/vectorDbProviders/pgvector");

describe("PGVector.sanitizeForJsonb", () => {
it("returns null/undefined as-is", () => {
expect(PGVector.sanitizeForJsonb(null)).toBeNull();
expect(PGVector.sanitizeForJsonb(undefined)).toBeUndefined();
});

it("keeps safe whitespace (tab, LF, CR) and removes disallowed C0 controls", () => {
const input = "a\u0000\u0001\u0002\tline\ncarriage\rreturn\u001Fend";
const result = PGVector.sanitizeForJsonb(input);
// Expect all < 0x20 except 9,10,13 removed; keep letters and allowed whitespace
expect(result).toBe("a\tline\ncarriage\rreturnend");
});

it("removes only disallowed control chars; keeps normal printable chars", () => {
const input = "Hello\u0000, World! \u0007\u0008\u000B\u000C\u001F";
const result = PGVector.sanitizeForJsonb(input);
expect(result).toBe("Hello, World! ");
});

it("deeply sanitizes objects", () => {
const input = {
plain: "ok",
bad: "has\u0000nul",
nested: {
arr: ["fine", "bad\u0001", { deep: "\u0002oops" }],
},
};
const result = PGVector.sanitizeForJsonb(input);
expect(result).toEqual({
plain: "ok",
bad: "hasnul",
nested: { arr: ["fine", "bad", { deep: "oops" }] },
});
});

it("deeply sanitizes arrays", () => {
const input = ["\u0000", 1, true, { s: "bad\u0003" }, ["ok", "\u0004bad"]];
const result = PGVector.sanitizeForJsonb(input);
expect(result).toEqual(["", 1, true, { s: "bad" }, ["ok", "bad"]]);
});

it("converts Date to ISO string", () => {
const d = new Date("2020-01-02T03:04:05.000Z");
expect(PGVector.sanitizeForJsonb(d)).toBe(d.toISOString());
});

it("returns primitives unchanged (number, boolean, bigint)", () => {
expect(PGVector.sanitizeForJsonb(42)).toBe(42);
expect(PGVector.sanitizeForJsonb(3.14)).toBe(3.14);
expect(PGVector.sanitizeForJsonb(true)).toBe(true);
expect(PGVector.sanitizeForJsonb(false)).toBe(false);
expect(PGVector.sanitizeForJsonb(BigInt(1))).toBe(BigInt(1));
});

it("returns symbol unchanged", () => {
const sym = Symbol("x");
expect(PGVector.sanitizeForJsonb(sym)).toBe(sym);
});

it("does not mutate original objects/arrays", () => {
const obj = { a: "bad\u0000", nested: { b: "ok" } };
const arr = ["\u0001", { c: "bad\u0002" }];
const objCopy = JSON.parse(JSON.stringify(obj));
const arrCopy = JSON.parse(JSON.stringify(arr));
const resultObj = PGVector.sanitizeForJsonb(obj);
const resultArr = PGVector.sanitizeForJsonb(arr);
// Original inputs remain unchanged
expect(obj).toEqual(objCopy);
expect(arr).toEqual(arrCopy);
// Results are sanitized copies
expect(resultObj).toEqual({ a: "bad", nested: { b: "ok" } });
expect(resultArr).toEqual(["", { c: "bad" }]);
});
});
60 changes: 56 additions & 4 deletions server/utils/vectorDbProviders/pgvector/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,55 @@ const PGVector = {
console.log(`\x1b[35m[PGVectorDb]\x1b[0m ${message}`, ...args);
},

/**
* Recursively sanitize values intended for JSONB to prevent Postgres errors
* like "unsupported Unicode escape sequence". This primarily removes the
* NUL character (\u0000) and other disallowed control characters from
* strings. Arrays and objects are traversed and sanitized deeply.
* @param {any} value
* @returns {any}
*/
sanitizeForJsonb: function (value) {
// Fast path for null/undefined and primitives that do not need changes
if (value === null || value === undefined) return value;

// Strings: strip NUL and unsafe C0 control characters except common whitespace
if (typeof value === "string") {
// Build a sanitized string by excluding C0 control characters except
// horizontal tab (9), line feed (10), and carriage return (13).
let sanitized = "";
for (let i = 0; i < value.length; i++) {
const code = value.charCodeAt(i);
if (code === 9 || code === 10 || code === 13 || code >= 0x20) {
sanitized += value[i];
}
}
return sanitized;
}

// Arrays: sanitize each element
if (Array.isArray(value)) {
return value.map((item) => this.sanitizeForJsonb(item));
}

// Dates: keep as ISO string
if (value instanceof Date) {
return value.toISOString();
}

// Objects: sanitize each property value
if (typeof value === "object") {
const result = {};
for (const [k, v] of Object.entries(value)) {
result[k] = this.sanitizeForJsonb(v);
}
return result;
}

// Numbers, booleans, etc.
return value;
},

client: function (connectionString = null) {
return new pgsql.Client({
connectionString: connectionString || PGVector.connectionString(),
Expand Down Expand Up @@ -362,9 +411,11 @@ const PGVector = {

/**
* Update or create a collection in the database
* @param {pgsql.Connection} connection
* @param {{id: number, vector: number[], metadata: Object}[]} submissions
* @param {string} namespace
* @param {Object} params
* @param {pgsql.Connection} params.connection
* @param {{id: number, vector: number[], metadata: Object}[]} params.submissions
* @param {string} params.namespace
* @param {number} params.dimensions
* @returns {Promise<boolean>}
*/
updateOrCreateCollection: async function ({
Expand All @@ -381,9 +432,10 @@ const PGVector = {
await connection.query(`BEGIN`);
for (const submission of submissions) {
const embedding = `[${submission.vector.map(Number).join(",")}]`; // stringify the vector for pgvector
const sanitizedMetadata = this.sanitizeForJsonb(submission.metadata);
await connection.query(
`INSERT INTO "${PGVector.tableName()}" (id, namespace, embedding, metadata) VALUES ($1, $2, $3, $4)`,
[submission.id, namespace, embedding, submission.metadata]
[submission.id, namespace, embedding, sanitizedMetadata]
);
}
this.log(`Committing ${submissions.length} vectors to ${namespace}`);
Expand Down