Skip to content

Commit 24669f8

Browse files
authored
Add confidence score to DocumentField fieldvalue (Azure#26161)
1 parent 54c2b2a commit 24669f8

File tree

2 files changed

+71
-15
lines changed

2 files changed

+71
-15
lines changed

sdk/formrecognizer/azure-ai-formrecognizer/src/main/java/com/azure/ai/formrecognizer/implementation/util/Transforms.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ private static Map<String, DocumentField> toDocumentFields(
390390
return documentFieldMap;
391391
}
392392

393+
393394
private static DocumentField toDocumentField(
394395
com.azure.ai.formrecognizer.implementation.models.DocumentField innerDocumentField) {
395396
DocumentField documentField = new DocumentField();
@@ -409,6 +410,7 @@ private static DocumentField toDocumentField(
409410
.stream()
410411
.map(innerDocumentSpan -> toDocumentSpan(innerDocumentSpan))
411412
.collect(Collectors.toList()));
413+
DocumentFieldHelper.setConfidence(documentField, innerDocumentField.getConfidence());
412414
setDocumentFieldValue(innerDocumentField, documentField);
413415
return documentField;
414416
}

sdk/formrecognizer/azure-ai-formrecognizer/src/test/java/com/azure/ai/formrecognizer/DocumentAnalysisClientTestBase.java

Lines changed: 69 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -227,26 +227,38 @@ void validateBusinessCardData(AnalyzeResult analyzeResult) {
227227
final Map<String, DocumentField> actualBusinessCardFields = analyzeResult.getDocuments().get(0).getFields();
228228
assertEquals("2 Kingdom Street Paddington, London, W2 6BD",
229229
actualBusinessCardFields.get("Addresses").getValueList().get(0).getValueString());
230+
Assertions.assertNotNull(actualBusinessCardFields.get("Addresses").getValueList().get(0).getConfidence());
230231
assertEquals(EXPECTED_MERCHANT_NAME, actualBusinessCardFields.get("CompanyNames")
231232
.getValueList().get(0).getValueString());
233+
Assertions.assertNotNull(actualBusinessCardFields.get("CompanyNames").getValueList().get(0).getConfidence());
232234
assertEquals("Cloud & Al Department", actualBusinessCardFields.get("Departments")
233235
.getValueList().get(0).getValueString());
236+
Assertions.assertNotNull(actualBusinessCardFields.get("Departments").getValueList().get(0).getConfidence());
234237
assertEquals("avery.smith@contoso.com", actualBusinessCardFields.get("Emails")
235238
.getValueList().get(0).getValueString());
239+
Assertions.assertNotNull(actualBusinessCardFields.get("Emails").getValueList().get(0).getConfidence());
236240
assertEquals(DocumentFieldType.PHONE_NUMBER, actualBusinessCardFields.get("Faxes")
237241
.getValueList().get(0).getType());
242+
Assertions.assertNotNull(actualBusinessCardFields.get("Faxes").getValueList().get(0).getConfidence());
238243
assertEquals("Senior Researcher", actualBusinessCardFields.get("JobTitles")
239244
.getValueList().get(0).getValueString());
245+
Assertions.assertNotNull(actualBusinessCardFields.get("JobTitles").getValueList().get(0).getConfidence());
240246
assertEquals(DocumentFieldType.PHONE_NUMBER, actualBusinessCardFields.get("MobilePhones")
241247
.getValueList().get(0).getType());
248+
Assertions.assertNotNull(actualBusinessCardFields.get("MobilePhones").getValueList().get(0).getConfidence());
242249
assertEquals("https://www.contoso.com/", actualBusinessCardFields.get("Websites")
243250
.getValueList().get(0).getValueString());
251+
Assertions.assertNotNull(actualBusinessCardFields.get("Websites").getValueList().get(0).getConfidence());
244252
assertEquals(DocumentFieldType.PHONE_NUMBER, actualBusinessCardFields.get("WorkPhones")
245253
.getValueList().get(0).getType());
254+
Assertions.assertNotNull(actualBusinessCardFields.get("WorkPhones").getValueList().get(0).getConfidence());
246255
Map<String, DocumentField> contactNamesMap
247256
= actualBusinessCardFields.get("ContactNames").getValueList().get(0).getValueMap();
257+
// "FirstName" and "LastName" confidence returned as null by service, do we default?
258+
// Assertions.assertNotNull(contactNamesMap.get("FirstName").getConfidence());
248259
assertEquals("Avery", contactNamesMap.get("FirstName").getValueString());
249260
assertEquals("Smith", contactNamesMap.get("LastName").getValueString());
261+
// Assertions.assertNotNull(contactNamesMap.get("LastName").getConfidence());
250262
}
251263

252264
static void validateMultipageBusinessData(AnalyzeResult analyzeResult) {
@@ -259,25 +271,31 @@ static void validateMultipageBusinessData(AnalyzeResult analyzeResult) {
259271
Map<String, DocumentField> businessCard1Fields = analyzeResult.getDocuments().get(0).getFields();
260272
List<DocumentField> emailList = businessCard1Fields.get("Emails").getValueList();
261273
assertEquals("johnsinger@contoso.com", emailList.get(0).getValueString());
274+
Assertions.assertNotNull(emailList.get(0).getConfidence());
262275
List<DocumentField> phoneNumberList = businessCard1Fields.get("OtherPhones").getValueList();
276+
Assertions.assertNotNull(phoneNumberList.get(0).getConfidence());
263277
assertEquals("+14257793479", phoneNumberList.get(0).getValuePhoneNumber());
264278
assertEquals(1, businessCard1.getPageNumber());
265279

266280
// assert contact name page number
267281
DocumentField contactNameField = businessCard1Fields.get("ContactNames").getValueList().get(0);
268282
assertEquals("JOHN SINGER", contactNameField.getContent());
283+
Assertions.assertNotNull(contactNameField.getConfidence());
269284

270285
assertEquals(2, businessCard2.getPageNumber());
271286
Map<String, DocumentField> businessCard2Fields = analyzeResult.getDocuments().get(1).getFields();
272287
List<DocumentField> email2List = businessCard2Fields.get("Emails").getValueList();
273288
assertEquals("avery.smith@contoso.com", email2List.get(0).getValueString());
289+
Assertions.assertNotNull(email2List.get(0).getConfidence());
274290
List<DocumentField> phoneNumber2List = businessCard2Fields.get("WorkPhones").getValueList();
275291
assertEquals("+44 (0) 20 9876 5432", phoneNumber2List.get(0).getContent());
292+
Assertions.assertNotNull(phoneNumber2List.get(0).getConfidence());
276293

277294
// assert contact name page number
278295
DocumentField contactName2Field = businessCard2Fields.get("ContactNames").getValueList().get(0);
279296
assertEquals(2, contactName2Field.getBoundingRegions().get(0).getPageNumber());
280297
assertEquals("Dr. Avery Smith", contactName2Field.getContent());
298+
Assertions.assertNotNull(contactName2Field.getConfidence());
281299
}
282300

283301
void validateInvoiceData(AnalyzeResult analyzeResult) {
@@ -306,27 +324,39 @@ void validateInvoiceData(AnalyzeResult analyzeResult) {
306324
Map<String, DocumentField> invoicePage1Fields = analyzeResult.getDocuments().get(0).getFields();
307325
assertEquals("1020 Enterprise Way Sunnayvale, CA 87659", invoicePage1Fields.get("CustomerAddress")
308326
.getValueString());
327+
Assertions.assertNotNull(invoicePage1Fields.get("CustomerAddress").getConfidence());
309328
assertEquals("Microsoft", invoicePage1Fields.get("CustomerAddressRecipient")
310329
.getValueString());
330+
Assertions.assertNotNull(invoicePage1Fields.get("CustomerAddressRecipient").getConfidence());
311331
assertEquals("Microsoft", invoicePage1Fields.get("CustomerName")
312332
.getValueString());
333+
Assertions.assertNotNull(invoicePage1Fields.get("CustomerName").getConfidence());
313334
assertEquals(LocalDate.of(2017, 6, 24), invoicePage1Fields.get("DueDate")
314335
.getValueDate());
336+
Assertions.assertNotNull(invoicePage1Fields.get("DueDate").getConfidence());
315337
assertEquals(LocalDate.of(2017, 6, 18), invoicePage1Fields.get("InvoiceDate")
316338
.getValueDate());
339+
Assertions.assertNotNull(invoicePage1Fields.get("InvoiceDate").getConfidence());
317340
assertEquals("34278587", invoicePage1Fields.get("InvoiceId")
318341
.getValueString());
342+
Assertions.assertNotNull(invoicePage1Fields.get("InvoiceId").getConfidence());
319343
assertEquals("1 Redmond way Suite 6000 Redmond, WA 99243", invoicePage1Fields.get("VendorAddress")
320344
.getValueString());
345+
Assertions.assertNotNull(invoicePage1Fields.get("VendorAddress").getConfidence());
321346
assertEquals(EXPECTED_MERCHANT_NAME, invoicePage1Fields.get("VendorName")
322347
.getValueString());
348+
Assertions.assertNotNull(invoicePage1Fields.get("VendorName").getConfidence());
323349

324350
Map<String, DocumentField> itemsMap
325351
= invoicePage1Fields.get("Items").getValueList().get(0).getValueMap();
326352
assertEquals(56651.49f, itemsMap.get("Amount").getValueFloat());
353+
Assertions.assertNotNull(itemsMap.get("Amount").getConfidence());
327354
assertEquals(LocalDate.of(2017, 6, 18), itemsMap.get("Date").getValueDate());
355+
Assertions.assertNotNull(itemsMap.get("Date").getConfidence());
328356
assertEquals("34278587", itemsMap.get("ProductCode").getValueString());
357+
Assertions.assertNotNull(itemsMap.get("ProductCode").getConfidence());
329358
assertEquals(DocumentFieldType.FLOAT, itemsMap.get("Tax").getType());
359+
Assertions.assertNotNull(itemsMap.get("Tax").getConfidence());
330360
}
331361

332362
static void validateMultipageInvoiceData(AnalyzeResult analyzeResult) {
@@ -335,19 +365,22 @@ static void validateMultipageInvoiceData(AnalyzeResult analyzeResult) {
335365

336366
assertEquals(1, invoicePage1.getPageNumber());
337367
assertEquals(1, analyzeResult.getDocuments().size());
368+
338369
Map<String, DocumentField> recognizedInvoiceFields = analyzeResult.getDocuments().get(0).getFields();
339370
final DocumentField remittanceAddressRecipient = recognizedInvoiceFields.get("RemittanceAddressRecipient");
340-
371+
Assertions.assertNotNull(recognizedInvoiceFields.get("RemittanceAddressRecipient").getConfidence());
341372
assertEquals("Contoso Ltd.", remittanceAddressRecipient.getValueString());
342373
assertEquals(1, remittanceAddressRecipient.getBoundingRegions().get(0).getPageNumber());
343-
final DocumentField remittanceAddress = recognizedInvoiceFields.get("RemittanceAddress");
344374

375+
final DocumentField remittanceAddress = recognizedInvoiceFields.get("RemittanceAddress");
345376
assertEquals("2345 Dogwood Lane Birch, Kansas 98123", remittanceAddress.getValueString());
346377
assertEquals(1, remittanceAddress.getBoundingRegions().get(0).getPageNumber());
378+
Assertions.assertNotNull(remittanceAddress.getConfidence());
347379

348380
final DocumentField vendorName = recognizedInvoiceFields.get("VendorName");
349381
assertEquals("Southridge Video", vendorName.getValueString());
350382
assertEquals(2, vendorName.getBoundingRegions().get(0).getPageNumber());
383+
Assertions.assertNotNull(vendorName.getConfidence());
351384
}
352385

353386
void validateIdentityData(AnalyzeResult analyzeResult) {
@@ -376,25 +409,30 @@ void validateIdentityData(AnalyzeResult analyzeResult) {
376409
Map<String, DocumentField> licensePageFields = analyzeResult.getDocuments().get(0).getFields();
377410
assertEquals("123 STREET ADDRESS YOUR CITY WA 99999-1234", licensePageFields.get("Address")
378411
.getValueString());
412+
Assertions.assertNotNull(licensePageFields.get("Address").getConfidence());
379413
assertEquals("USA", licensePageFields.get("CountryRegion").getValueCountryRegion());
414+
Assertions.assertNotNull(licensePageFields.get("CountryRegion").getConfidence());
380415
assertEquals(LocalDate.of(1958, 1, 6), licensePageFields.get("DateOfBirth")
381416
.getValueDate());
417+
Assertions.assertNotNull(licensePageFields.get("DateOfBirth").getConfidence());
382418
assertEquals(LocalDate.of(2020, 8, 12), licensePageFields.get("DateOfExpiration")
383419
.getValueDate());
420+
Assertions.assertNotNull(licensePageFields.get("DateOfExpiration").getConfidence());
384421
assertEquals("WDLABCD456DG", licensePageFields.get("DocumentNumber")
385422
.getValueString());
386-
assertEquals("LIAM R.", licensePageFields.get("FirstName")
387-
.getValueString());
388-
assertEquals("TALBOT", licensePageFields.get("LastName")
389-
.getValueString());
390-
assertEquals("Washington", licensePageFields.get("Region")
391-
.getValueString());
392-
assertEquals("M", licensePageFields.get("Sex")
393-
.getValueString());
394-
assertEquals("L", licensePageFields.get("Endorsements")
395-
.getValueString());
396-
assertEquals("B", licensePageFields.get("Restrictions")
397-
.getValueString());
423+
Assertions.assertNotNull(licensePageFields.get("DocumentNumber").getConfidence());
424+
assertEquals("LIAM R.", licensePageFields.get("FirstName").getValueString());
425+
Assertions.assertNotNull(licensePageFields.get("FirstName").getConfidence());
426+
assertEquals("TALBOT", licensePageFields.get("LastName").getValueString());
427+
Assertions.assertNotNull(licensePageFields.get("LastName").getConfidence());
428+
assertEquals("Washington", licensePageFields.get("Region").getValueString());
429+
Assertions.assertNotNull(licensePageFields.get("Region").getConfidence());
430+
assertEquals("M", licensePageFields.get("Sex").getValueString());
431+
Assertions.assertNotNull(licensePageFields.get("Sex").getConfidence());
432+
assertEquals("L", licensePageFields.get("Endorsements").getValueString());
433+
Assertions.assertNotNull(licensePageFields.get("Endorsements").getConfidence());
434+
assertEquals("B", licensePageFields.get("Restrictions").getValueString());
435+
Assertions.assertNotNull(licensePageFields.get("Restrictions").getConfidence());
398436
}
399437

400438
void validateGermanContentData(AnalyzeResult analyzeResult) {
@@ -589,7 +627,7 @@ void validateJpegCustomDocument(AnalyzeResult actualAnalyzeResult, String modelI
589627
Assertions.assertEquals(modelId + ":" + modelId, actualDocument.getDocType());
590628
actualDocument.getFields().forEach((key, documentField) -> {
591629
// document fields
592-
630+
Assertions.assertNotNull(documentField.getConfidence());
593631
if ("Tax".equals(key)) {
594632
assertEquals("$4.00", documentField.getValueString());
595633
}
@@ -633,6 +671,7 @@ void validateMultiPagePdfData(AnalyzeResult analyzeResult, String modelId) {
633671
assertEquals(modelId + ":" + modelId, analyzedDocument.getDocType());
634672
analyzedDocument.getFields().forEach((key, documentField) -> {
635673
Assertions.assertNotNull(documentField.getType());
674+
Assertions.assertNotNull(documentField.getConfidence());
636675
});
637676
});
638677
}
@@ -675,6 +714,9 @@ private void validatePngReceiptFields(Map<String, DocumentField> actualFields) {
675714
Assertions.assertNotNull(actualFields.get("Subtotal").getValueFloat());
676715
Assertions.assertNotNull(actualFields.get("Total").getValueFloat());
677716
Assertions.assertNotNull(actualFields.get("Tax").getValueFloat());
717+
Assertions.assertNotNull(actualFields.get("Subtotal").getConfidence());
718+
Assertions.assertNotNull(actualFields.get("Total").getConfidence());
719+
Assertions.assertNotNull(actualFields.get("Tax").getConfidence());
678720
Assertions.assertNotNull(actualFields.get("Items"));
679721
List<DocumentField> itemizedItems = actualFields.get("Items").getValueList();
680722

@@ -686,6 +728,7 @@ private void validatePngReceiptFields(Map<String, DocumentField> actualFields) {
686728
Map<String, DocumentField> actualReceiptItems = itemizedItems.get(i).getValueMap();
687729
int finalI = i;
688730
actualReceiptItems.forEach((key, documentField) -> {
731+
Assertions.assertNotNull(documentField.getConfidence());
689732
if ("Name".equals(key)) {
690733
if (DocumentFieldType.STRING == documentField.getType()) {
691734
String name = documentField.getValueString();
@@ -725,26 +768,37 @@ private void validateJpegReceiptFields(Map<String, DocumentField> actualFields)
725768
}
726769
if ("Locale".equals(key)) {
727770
Assertions.assertEquals("en-US", documentField.getValueString());
771+
Assertions.assertNotNull(documentField.getConfidence());
728772
} else if ("MerchantAddress".equals(key)) {
729773
Assertions.assertEquals("123 Main Street Redmond, WA 98052", documentField.getValueString());
774+
Assertions.assertNotNull(documentField.getConfidence());
730775
} else if ("MerchantName".equals(key)) {
731776
Assertions.assertEquals("Contoso", documentField.getValueString());
777+
Assertions.assertNotNull(documentField.getConfidence());
732778
} else if ("MerchantPhoneNumber".equals(key)) {
733779
Assertions.assertEquals("+19876543210", documentField.getValuePhoneNumber());
780+
Assertions.assertNotNull(documentField.getConfidence());
734781
} else if ("ReceiptType".equals(key)) {
735782
Assertions.assertEquals("Itemized", documentField.getValueString());
783+
Assertions.assertNotNull(documentField.getConfidence());
736784
} else if ("Subtotal".equals(key)) {
737785
Assertions.assertEquals(11.7f, documentField.getValueFloat());
786+
Assertions.assertNotNull(documentField.getConfidence());
738787
} else if ("Tax".equals(key)) {
739788
Assertions.assertEquals(1.17f, documentField.getValueFloat());
789+
Assertions.assertNotNull(documentField.getConfidence());
740790
} else if ("Tip".equals(key)) {
741791
Assertions.assertEquals(1.63f, documentField.getValueFloat());
792+
Assertions.assertNotNull(documentField.getConfidence());
742793
} else if ("TransactionDate".equals(key)) {
743794
Assertions.assertEquals(LocalDate.of(2019, 6, 10), documentField.getValueDate());
795+
Assertions.assertNotNull(documentField.getConfidence());
744796
} else if ("TransactionTime".equals(key)) {
745797
Assertions.assertEquals(LocalTime.of(13, 59), documentField.getValueTime());
798+
Assertions.assertNotNull(documentField.getConfidence());
746799
} else if ("Total".equals(key)) {
747800
Assertions.assertEquals(14.5f, documentField.getValueFloat());
801+
Assertions.assertNotNull(documentField.getConfidence());
748802
}
749803
});
750804
}

0 commit comments

Comments
 (0)