From b6560ef0a0094065a51f9719135a4d45476308ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= Date: Mon, 27 Oct 2025 18:28:04 +0100 Subject: [PATCH] :sparkles: add RAG metadata in v2 results --- src/index.ts | 1 + src/parsing/v2/index.ts | 1 + src/parsing/v2/inferenceResult.ts | 9 ++++++ src/parsing/v2/ragMetadata.ts | 12 ++++++++ tests/v2/parsing/inference.spec.ts | 45 ++++++++++++++++++++---------- tests/v2/parsing/job.spec.ts | 2 +- 6 files changed, 54 insertions(+), 16 deletions(-) create mode 100644 src/parsing/v2/ragMetadata.ts diff --git a/src/index.ts b/src/index.ts index a8731bd9..e1db5918 100644 --- a/src/index.ts +++ b/src/index.ts @@ -14,6 +14,7 @@ export { InferenceResponse, JobResponse, RawText, + RagMetadata, } from "./parsing/v2"; export { InputSource, diff --git a/src/parsing/v2/index.ts b/src/parsing/v2/index.ts index 2ddc5302..b459c00c 100644 --- a/src/parsing/v2/index.ts +++ b/src/parsing/v2/index.ts @@ -11,3 +11,4 @@ export { Job } from "./job"; export { JobResponse } from "./jobResponse"; export { RawText } from "./rawText"; export { JobWebhook } from "./jobWebhook"; +export { RagMetadata } from "./ragMetadata"; diff --git a/src/parsing/v2/inferenceResult.ts b/src/parsing/v2/inferenceResult.ts index 19409fa6..1468234f 100644 --- a/src/parsing/v2/inferenceResult.ts +++ b/src/parsing/v2/inferenceResult.ts @@ -1,6 +1,7 @@ import { InferenceFields } from "./field"; import { StringDict } from "../common"; import { RawText } from "./rawText"; +import { RagMetadata } from "./ragMetadata"; export class InferenceResult { /** @@ -13,11 +14,19 @@ export class InferenceResult { */ public rawText?: RawText; + /** + * RAG metadata. + */ + public rag?: RagMetadata; + constructor(serverResponse: StringDict) { this.fields = new InferenceFields(serverResponse["fields"]); if (serverResponse["raw_text"]) { this.rawText = new RawText(serverResponse["raw_text"]); } + if (serverResponse["rag"]) { + this.rag = new RagMetadata(serverResponse["rag"]); + } } toString(): string { diff --git a/src/parsing/v2/ragMetadata.ts b/src/parsing/v2/ragMetadata.ts new file mode 100644 index 00000000..0195b7d4 --- /dev/null +++ b/src/parsing/v2/ragMetadata.ts @@ -0,0 +1,12 @@ +import { StringDict } from "../common"; + +export class RagMetadata { + /** + * The UUID of the matched document used during the RAG operation. + */ + retrievedDocumentId?: string; + + constructor(serverResponse: StringDict) { + this.retrievedDocumentId = serverResponse["retrieved_document_id"] ?? undefined; + } +} diff --git a/tests/v2/parsing/inference.spec.ts b/tests/v2/parsing/inference.spec.ts index 766472c9..9c60e33c 100644 --- a/tests/v2/parsing/inference.spec.ts +++ b/tests/v2/parsing/inference.spec.ts @@ -1,6 +1,6 @@ import { expect } from "chai"; import path from "node:path"; -import { LocalResponse, InferenceResponse, RawText } from "../../../src"; +import { LocalResponse, InferenceResponse, RawText, RagMetadata } from "../../../src"; import { FieldConfidence, ListField, ObjectField, SimpleField } from "../../../src/parsing/v2/field"; import { promises as fs } from "node:fs"; import { Polygon } from "../../../src/geometry"; @@ -12,9 +12,6 @@ const deepNestedFieldPath = path.join(inferencePath, "deep_nested_fields.json"); const standardFieldPath = path.join(inferencePath, "standard_field_types.json"); const standardFieldRstPath = path.join(inferencePath, "standard_field_types.rst"); const locationFieldPath = path.join(findocPath, "complete_with_coordinates.json"); -const rawTextPath = path.join(inferencePath, "raw_texts.json"); -const blankPath = path.join(findocPath, "blank.json"); -const completePath = path.join(findocPath, "complete.json"); async function loadV2Inference(resourcePath: string): Promise { const localResponse = new LocalResponse(resourcePath); @@ -22,10 +19,12 @@ async function loadV2Inference(resourcePath: string): Promise return localResponse.deserializeResponse(InferenceResponse); } -describe("inference", async () => { - describe("simple", async () => { +describe("MindeeV2 - Inference Response", async () => { + describe("Financial Document", async () => { it("should load a blank inference with valid properties", async () => { - const response = await loadV2Inference(blankPath); + const response = await loadV2Inference( + path.join(findocPath, "blank.json") + ); const fields = response.inference.result.fields; expect(fields).to.be.not.empty; @@ -55,7 +54,9 @@ describe("inference", async () => { }); it("should load a complete inference with valid properties", async () => { - const response = await loadV2Inference(completePath); + const response = await loadV2Inference( + path.join(findocPath, "complete.json") + ); const inference = response.inference; expect(inference).to.not.be.undefined; @@ -116,7 +117,7 @@ describe("inference", async () => { }); }); - describe("nested", async () => { + describe("Deeply Nested", async () => { it("should load a deep nested object", async () => { const response = await loadV2Inference(deepNestedFieldPath); const fields = response.inference.result.fields; @@ -152,7 +153,7 @@ describe("inference", async () => { }); }); - describe("standard field types", async () => { + describe("Standard Field Types", async () => { it("should recognize simple fields", async () => { const response = await loadV2Inference(standardFieldPath); const fields = response.inference.result.fields; @@ -259,11 +260,14 @@ describe("inference", async () => { }); }); - describe("raw text", async () => { + describe("Raw Text", async () => { it("raw text should be exposed", async () => { - const response = await loadV2Inference(rawTextPath); - const rawText = response.inference.result.rawText; + const response = await loadV2Inference( + path.join(inferencePath, "raw_texts.json") + ); + expect(response.inference.result.rag).to.be.undefined; + const rawText = response.inference.result.rawText; expect(rawText).to.be.instanceOf(RawText); const pages = rawText?.pages; @@ -275,7 +279,18 @@ describe("inference", async () => { }); }); - describe("rst display", async () => { + describe("RAG Metadata", async () => { + it("RAG metadata should be exposed", async () => { + const response = await loadV2Inference( + path.join(inferencePath, "rag_matched.json") + ); + const rag = response.inference.result.rag; + expect(rag).to.be.instanceOf(RagMetadata); + expect(rag?.retrievedDocumentId).to.eq("12345abc-1234-1234-1234-123456789abc"); + }); + }); + + describe("RST Display", async () => { it("to be properly exposed", async () => { const response = await loadV2Inference(standardFieldPath); const rstString = await fs.readFile(standardFieldRstPath, "utf8"); @@ -285,7 +300,7 @@ describe("inference", async () => { }).timeout(10000); }); - describe("field locations and confidence", async () => { + describe("Field Locations and Confidence", async () => { it("to be properly exposed", async () => { const response = await loadV2Inference(locationFieldPath); diff --git a/tests/v2/parsing/job.spec.ts b/tests/v2/parsing/job.spec.ts index 5bd327a1..675c0e92 100644 --- a/tests/v2/parsing/job.spec.ts +++ b/tests/v2/parsing/job.spec.ts @@ -12,7 +12,7 @@ async function loadV2Job(resourcePath: string): Promise { return localResponse.deserializeResponse(JobResponse); } -describe("job", async () => { +describe("MindeeV2 - Job Response", async () => { describe("OK", async () => { it("should load when status is Processing", async () => { const response = await loadV2Job(