Skip to content

Commit 8a4d55b

Browse files
authored
[Form Recognizer] Add support for building model with Blob List source (Azure#34232)
1 parent e603df2 commit 8a4d55b

File tree

11 files changed

+398
-23
lines changed

11 files changed

+398
-23
lines changed

sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
### Features Added
66
- Added support for analyzing new document types
7-
-
7+
- Added support for AzureBlobList source type when building document models
8+
- Added support for building, getting, listing and deleting classifiers
89
### Breaking Changes
910

1011
### Bugs Fixed

sdk/formrecognizer/azure-ai-formrecognizer/src/main/java/com/azure/ai/formrecognizer/documentanalysis/administration/DocumentModelAdministrationAsyncClient.java

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,12 +270,67 @@ public PollerFlux<OperationResult, DocumentModelDetails> beginBuildDocumentModel
270270
DocumentModelBuildMode buildMode,
271271
String prefix,
272272
BuildDocumentModelOptions buildDocumentModelOptions) {
273-
return beginBuildDocumentModel(blobContainerUrl, buildMode, prefix, buildDocumentModelOptions, Context.NONE);
273+
return beginBuildDocumentModel(blobContainerUrl, buildMode, prefix, null, buildDocumentModelOptions, Context.NONE);
274+
}
275+
276+
/**
277+
* Builds a custom document analysis model.
278+
* Models are built using documents that are of the following content type -
279+
* 'application/pdf', 'image/jpeg', 'image/png', 'image/tiff', image/bmp.
280+
* Other type of content is ignored.
281+
* <p>The service does not support cancellation of the long running operation and returns with an
282+
* error message indicating absence of cancellation support.</p>
283+
* See <a href="https://docs.microsoft.com/azure/cognitive-services/form-recognizer/build-training-data-set#upload-your-training-data">here</a>
284+
* for information on building your own administration data set.
285+
*
286+
* <p><strong>Code sample</strong></p>
287+
* <!-- src_embed com.azure.ai.formrecognizer.documentanalysis.administration.DocumentModelAdminAsyncClient.beginBuildDocumentModel#String-BuildMode-String -->
288+
* <pre>
289+
* String blobContainerUrl = &quot;&#123;SAS-URL-of-your-container-in-blob-storage&#125;&quot;;
290+
* String fileList = &quot;&quot;;
291+
* documentModelAdministrationAsyncClient.beginBuildDocumentModel&#40;blobContainerUrl,
292+
* DocumentModelBuildMode.TEMPLATE, fileList
293+
* &#41;
294+
* &#47;&#47; if polling operation completed, retrieve the final result.
295+
* .flatMap&#40;AsyncPollResponse::getFinalResult&#41;
296+
* .subscribe&#40;documentModel -&gt; &#123;
297+
* System.out.printf&#40;&quot;Model ID: %s%n&quot;, documentModel.getModelId&#40;&#41;&#41;;
298+
* System.out.printf&#40;&quot;Model Created on: %s%n&quot;, documentModel.getCreatedOn&#40;&#41;&#41;;
299+
* documentModel.getDocumentTypes&#40;&#41;.forEach&#40;&#40;key, documentTypeDetails&#41; -&gt; &#123;
300+
* documentTypeDetails.getFieldSchema&#40;&#41;.forEach&#40;&#40;field, documentFieldSchema&#41; -&gt; &#123;
301+
* System.out.printf&#40;&quot;Field: %s&quot;, field&#41;;
302+
* System.out.printf&#40;&quot;Field type: %s&quot;, documentFieldSchema.getType&#40;&#41;&#41;;
303+
* System.out.printf&#40;&quot;Field confidence: %.2f&quot;, documentTypeDetails.getFieldConfidence&#40;&#41;.get&#40;field&#41;&#41;;
304+
* &#125;&#41;;
305+
* &#125;&#41;;
306+
* &#125;&#41;;
307+
* </pre>
308+
* <!-- end com.azure.ai.formrecognizer.documentanalysis.administration.DocumentModelAdminAsyncClient.beginBuildDocumentModel#String-BuildMode-String -->
309+
*
310+
* @param blobContainerUrl an Azure Storage blob container's SAS URI. A container URI (without SAS)
311+
* can be used if the container is public or has a managed identity configured. For more information on
312+
* setting up a training data set, see: <a href="https://aka.ms/azsdk/formrecognizer/buildcustommodel">here</a>.
313+
* @param buildMode the preferred technique for creating models. For faster training of models use
314+
* {@link DocumentModelBuildMode#TEMPLATE}. See <a href="https://aka.ms/azsdk/formrecognizer/buildmode">here</a>
315+
* for more information on building mode for custom documents.
316+
* @param fileList Path to a JSONL file within the container specifying the set of documents for training.
317+
* @return A {@link PollerFlux} that polls the building model operation until it has completed, has failed, or has
318+
* been cancelled. The completed operation returns the trained {@link DocumentModelDetails custom document analysis model}.
319+
* @throws HttpResponseException If building a model fails with {@link OperationStatus#FAILED} is created.
320+
* @throws NullPointerException If {@code blobContainerUrl} and {@code fileList} is null.
321+
*/
322+
@ServiceMethod(returns = ReturnType.LONG_RUNNING_OPERATION)
323+
public PollerFlux<OperationResult, DocumentModelDetails> beginBuildDocumentModel(String blobContainerUrl,
324+
DocumentModelBuildMode buildMode,
325+
String fileList) {
326+
Objects.requireNonNull(fileList, "'fileList' is required and cannot be null.");
327+
return beginBuildDocumentModel(blobContainerUrl, buildMode, null, fileList, null, Context.NONE);
274328
}
275329

276330
PollerFlux<OperationResult, DocumentModelDetails> beginBuildDocumentModel(String blobContainerUrl,
277331
DocumentModelBuildMode buildMode,
278332
String prefix,
333+
String fileList,
279334
BuildDocumentModelOptions buildDocumentModelOptions,
280335
Context context) {
281336

@@ -287,7 +342,7 @@ PollerFlux<OperationResult, DocumentModelDetails> beginBuildDocumentModel(String
287342
}
288343
return new PollerFlux<OperationResult, DocumentModelDetails>(
289344
DEFAULT_POLL_INTERVAL,
290-
buildModelActivationOperation(blobContainerUrl, buildMode, modelId, prefix, buildDocumentModelOptions, context),
345+
buildModelActivationOperation(blobContainerUrl, buildMode, modelId, prefix, fileList, buildDocumentModelOptions, context),
291346
createModelPollOperation(context),
292347
(activationResponse, pollingContext) -> Mono.error(new RuntimeException("Cancellation is not supported")),
293348
fetchModelResultOperation(context));
@@ -1235,12 +1290,12 @@ PollerFlux<OperationResult, DocumentClassifierDetails> beginBuildDocumentClassif
12351290
private Function<PollingContext<OperationResult>, Mono<OperationResult>>
12361291
buildModelActivationOperation(
12371292
String blobContainerUrl, DocumentModelBuildMode buildMode, String modelId,
1238-
String prefix, BuildDocumentModelOptions buildDocumentModelOptions, Context context) {
1293+
String prefix, String fileList, BuildDocumentModelOptions buildDocumentModelOptions, Context context) {
12391294
return (pollingContext) -> {
12401295
try {
12411296
Objects.requireNonNull(blobContainerUrl, "'blobContainerUrl' cannot be null.");
12421297
BuildDocumentModelRequest buildDocumentModelRequest =
1243-
getBuildDocumentModelRequest(blobContainerUrl, buildMode, modelId, prefix,
1298+
getBuildDocumentModelRequest(blobContainerUrl, buildMode, modelId, prefix, fileList,
12441299
buildDocumentModelOptions);
12451300

12461301
return documentModelsImpl.buildModelWithResponseAsync(buildDocumentModelRequest, context)

sdk/formrecognizer/azure-ai-formrecognizer/src/main/java/com/azure/ai/formrecognizer/documentanalysis/administration/DocumentModelAdministrationClient.java

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,11 +241,63 @@ public SyncPoller<OperationResult, DocumentModelDetails> beginBuildDocumentModel
241241
String blobContainerUrl, DocumentModelBuildMode buildMode,
242242
String prefix, BuildDocumentModelOptions buildDocumentModelOptions,
243243
Context context) {
244-
return beginBuildDocumentModelSync(blobContainerUrl, buildMode, prefix, buildDocumentModelOptions, context);
244+
return beginBuildDocumentModelSync(blobContainerUrl, buildMode, prefix, null, buildDocumentModelOptions, context);
245+
}
246+
247+
/**
248+
* Builds a custom document analysis model.
249+
* <p>Models are built using documents that are of the following content
250+
* type - 'application/pdf', 'image/jpeg', 'image/png', 'image/tiff', image/bmp.
251+
* Other type of content is ignored.
252+
* </p>
253+
* <p>The service does not support cancellation of the long running operation and returns with an
254+
* error message indicating absence of cancellation support.</p>
255+
*
256+
* <p><strong>Code sample</strong></p>
257+
* <!-- src_embed com.azure.ai.formrecognizer.documentanalysis.administration.DocumentModelAdminClient.beginBuildDocumentModel#String-BuildMode-String -->
258+
* <pre>
259+
* String blobContainerUrl = &quot;&#123;SAS-URL-of-your-container-in-blob-storage&#125;&quot;;
260+
* String fileList = &quot;&quot;;
261+
*
262+
* DocumentModelDetails documentModelDetails
263+
* = documentModelAdministrationClient.beginBuildDocumentModel&#40;blobContainerUrl,
264+
* DocumentModelBuildMode.TEMPLATE, fileList&#41;
265+
* .getFinalResult&#40;&#41;;
266+
*
267+
* System.out.printf&#40;&quot;Model ID: %s%n&quot;, documentModelDetails.getModelId&#40;&#41;&#41;;
268+
* System.out.printf&#40;&quot;Model Created on: %s%n&quot;, documentModelDetails.getCreatedOn&#40;&#41;&#41;;
269+
* documentModelDetails.getDocumentTypes&#40;&#41;.forEach&#40;&#40;key, documentTypeDetails&#41; -&gt; &#123;
270+
* documentTypeDetails.getFieldSchema&#40;&#41;.forEach&#40;&#40;field, documentFieldSchema&#41; -&gt; &#123;
271+
* System.out.printf&#40;&quot;Field: %s&quot;, field&#41;;
272+
* System.out.printf&#40;&quot;Field type: %s&quot;, documentFieldSchema.getType&#40;&#41;&#41;;
273+
* System.out.printf&#40;&quot;Field confidence: %.2f&quot;, documentTypeDetails.getFieldConfidence&#40;&#41;.get&#40;field&#41;&#41;;
274+
* &#125;&#41;;
275+
* &#125;&#41;;
276+
* </pre>
277+
* <!-- end com.azure.ai.formrecognizer.documentanalysis.administration.DocumentModelAdminClient.beginBuildDocumentModel#String-BuildMode-String -->
278+
*
279+
* @param blobContainerUrl an Azure Storage blob container's SAS URI. A container URI (without SAS)
280+
* can be used if the container is public or has a managed identity configured. For more information on
281+
* setting up a training data set, see: <a href="https://aka.ms/azsdk/formrecognizer/buildcustommodel">here</a>.
282+
* @param buildMode the preferred technique for creating models. For faster training of models use
283+
* {@link DocumentModelBuildMode#TEMPLATE}. See <a href="https://aka.ms/azsdk/formrecognizer/buildmode">here</a>
284+
* for more information on building mode for custom documents.
285+
* @param fileList Path to a JSONL file within the container specifying the set of documents for training.
286+
* @return A {@link SyncPoller} that polls the building model operation until it has completed, has failed, or has
287+
* been cancelled. The completed operation returns the built {@link DocumentModelDetails custom document analysis model}.
288+
* @throws HttpResponseException If building the model fails with {@link OperationStatus#FAILED} is created.
289+
* @throws NullPointerException If {@code blobContainerUrl} and {@code fileList} is null.
290+
*/
291+
@ServiceMethod(returns = ReturnType.LONG_RUNNING_OPERATION)
292+
public SyncPoller<OperationResult, DocumentModelDetails> beginBuildDocumentModel(
293+
String blobContainerUrl, DocumentModelBuildMode buildMode,
294+
String fileList) {
295+
Objects.requireNonNull(fileList, "'fileList' is required and cannot be null.");
296+
return beginBuildDocumentModelSync(blobContainerUrl, buildMode, null, fileList, null, Context.NONE);
245297
}
246298

247299
SyncPoller<OperationResult, DocumentModelDetails> beginBuildDocumentModelSync(String blobContainerUrl,
248-
DocumentModelBuildMode buildMode, String prefix, BuildDocumentModelOptions buildDocumentModelOptions, Context context) {
300+
DocumentModelBuildMode buildMode, String prefix, String fileList, BuildDocumentModelOptions buildDocumentModelOptions, Context context) {
249301

250302
BuildDocumentModelOptions finalBuildDocumentModelOptions
251303
= getBuildDocumentModelOptions(buildDocumentModelOptions);
@@ -264,6 +316,7 @@ SyncPoller<OperationResult, DocumentModelDetails> beginBuildDocumentModelSync(St
264316
buildMode,
265317
finalModelId,
266318
prefix,
319+
fileList,
267320
finalBuildDocumentModelOptions,
268321
finalContext).apply(cxt)),
269322
buildModelPollingOperation(finalContext),
@@ -1044,6 +1097,7 @@ private PagedResponse<OperationSummary> listNextPageOperationInfo(String nextPag
10441097
}
10451098
}
10461099

1100+
10471101
/**
10481102
* Builds a custom classifier document model.
10491103
* <p>Classifier models can identify multiple documents or multiple instances of a single document. For that,
@@ -1391,13 +1445,13 @@ public Response<Void> deleteDocumentClassifierWithResponse(String classifierId,
13911445
}
13921446

13931447
private Function<PollingContext<OperationResult>, OperationResult> buildModelActivationOperation(
1394-
String blobContainerUrl, DocumentModelBuildMode buildMode, String modelId, String prefix,
1448+
String blobContainerUrl, DocumentModelBuildMode buildMode, String modelId, String prefix, String fileList,
13951449
BuildDocumentModelOptions buildDocumentModelOptions, Context context) {
13961450
return (pollingContext) -> {
13971451
try {
13981452
Objects.requireNonNull(blobContainerUrl, "'blobContainerUrl' cannot be null.");
13991453
BuildDocumentModelRequest buildDocumentModelRequest =
1400-
getBuildDocumentModelRequest(blobContainerUrl, buildMode, modelId, prefix,
1454+
getBuildDocumentModelRequest(blobContainerUrl, buildMode, modelId, prefix, fileList,
14011455
buildDocumentModelOptions);
14021456

14031457
ResponseBase<DocumentModelsBuildModelHeaders, Void>

sdk/formrecognizer/azure-ai-formrecognizer/src/main/java/com/azure/ai/formrecognizer/documentanalysis/implementation/util/Transforms.java

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -243,14 +243,18 @@ public static AnalyzeResult toAnalyzeResultOperation(
243243
}
244244

245245
public static BuildDocumentModelRequest getBuildDocumentModelRequest(String blobContainerUrl,
246-
DocumentModelBuildMode buildMode, String modelId, String prefix, BuildDocumentModelOptions buildDocumentModelOptions) {
247-
BuildDocumentModelRequest buildDocumentModelRequest
248-
= new BuildDocumentModelRequest(modelId, com.azure.ai.formrecognizer.documentanalysis.implementation.models.DocumentBuildMode
249-
.fromString(buildMode.toString()))
250-
.setAzureBlobSource(new com.azure.ai.formrecognizer.documentanalysis.implementation.models.AzureBlobContentSource(blobContainerUrl)
251-
.setPrefix(prefix))
246+
DocumentModelBuildMode buildMode, String modelId, String prefix, String fileList, BuildDocumentModelOptions buildDocumentModelOptions) {
247+
BuildDocumentModelRequest buildDocumentModelRequest = new BuildDocumentModelRequest(modelId,
248+
com.azure.ai.formrecognizer.documentanalysis.implementation.models.DocumentBuildMode
249+
.fromString(buildMode.toString()))
252250
.setDescription(buildDocumentModelOptions.getDescription())
253251
.setTags(buildDocumentModelOptions.getTags());
252+
if (fileList == null) {
253+
buildDocumentModelRequest.setAzureBlobSource(new com.azure.ai.formrecognizer.documentanalysis.implementation.models.AzureBlobContentSource(blobContainerUrl)
254+
.setPrefix(prefix));
255+
} else {
256+
buildDocumentModelRequest.setAzureBlobFileListSource(new com.azure.ai.formrecognizer.documentanalysis.implementation.models.AzureBlobFileListSource(blobContainerUrl, fileList));
257+
}
254258
return buildDocumentModelRequest;
255259
}
256260

@@ -671,18 +675,18 @@ public static OperationResult toDocumentOperationResult(
671675
}
672676

673677
public static AuthorizeCopyRequest getAuthorizeCopyRequest(CopyAuthorizationOptions copyAuthorizationOptions,
674-
String modelId) {
678+
String modelId) {
675679
return new AuthorizeCopyRequest(modelId)
676680
.setDescription(copyAuthorizationOptions.getDescription())
677681
.setTags(copyAuthorizationOptions.getTags());
678682
}
679683

680684
public static ComposeDocumentModelRequest getComposeDocumentModelRequest(List<String> componentModelIds,
681-
ComposeDocumentModelOptions composeDocumentModelOptions,
682-
String modelId) {
685+
ComposeDocumentModelOptions composeDocumentModelOptions,
686+
String modelId) {
683687
return new ComposeDocumentModelRequest(modelId, componentModelIds.stream()
684-
.map(modelIdString -> new ComponentDocumentModelDetails(modelIdString))
685-
.collect(Collectors.toList()))
688+
.map(modelIdString -> new ComponentDocumentModelDetails(modelIdString))
689+
.collect(Collectors.toList()))
686690
.setDescription(composeDocumentModelOptions.getDescription())
687691
.setTags(composeDocumentModelOptions.getTags());
688692
}
@@ -745,7 +749,7 @@ public static Map<String, com.azure.ai.formrecognizer.documentanalysis.implement
745749
innerClassifyDocTypeDetails.setAzureBlobSource(
746750
new com.azure.ai.formrecognizer.documentanalysis.implementation.models.AzureBlobContentSource(
747751
classifierDocumentTypeDetails.getAzureBlobSource().getContainerUrl())
748-
.setPrefix(classifierDocumentTypeDetails.getAzureBlobSource().getPrefix()));
752+
.setPrefix(classifierDocumentTypeDetails.getAzureBlobSource().getPrefix()));
749753
}
750754
innerTags.put(key, innerClassifyDocTypeDetails);
751755
});

0 commit comments

Comments
 (0)