Skip to content

Commit 289f3be

Browse files
[formrecognizer] Add classifier administration methods (Azure#29466)
* add DocumentClassifierDetails model * add admin methods for document classifiers * add samples * pushing an empty commit to prove I have access to your fork * add begin_build_document_classifier + tests + models + regen with poller * add tests for classifiers list/get/delete + record * add samples for begin_build_document_classifer and fix lint * fix sample snippet docstrings * update changelog * fix rtype * fix test-resources.json * update sample * fix documentation --------- Co-authored-by: Krista Pratico <krpratic@microsoft.com>
1 parent c44dd70 commit 289f3be

18 files changed

+1464
-13
lines changed

sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
- Added `features` keyword argument on `begin_analyze_document()` and `begin_analyze_document_from_url()`.
77
- Added `AnalysisFeature` enum with optional document analysis feature to enable.
88
- Added the following optional properties on `DocumentStyle` class: `similar_font_family`, `font_style`, `font_weight`, `color`, `background_color`.
9+
- Added support for custom document classification on `DocumentModelAdministrationClient`: `begin_build_document_classifier`,
10+
`list_document_classifiers`, `get_document_classifier`, and `delete_document_classifier`.
911

1012
### Breaking Changes
1113

sdk/formrecognizer/azure-ai-formrecognizer/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "python",
44
"TagPrefix": "python/formrecognizer/azure-ai-formrecognizer",
5-
"Tag": "python/formrecognizer/azure-ai-formrecognizer_ab3a99b236"
5+
"Tag": "python/formrecognizer/azure-ai-formrecognizer_380d29abf3"
66
}

sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
CurrencyValue,
4545
CustomDocumentModelsDetails,
4646
ModelBuildMode,
47+
DocumentClassifierDetails,
4748
DocumentField,
4849
DocumentKeyValuePair,
4950
DocumentKeyValueElement,
@@ -67,6 +68,11 @@
6768
DocumentAnalysisInnerError,
6869
TargetAuthorization,
6970
)
71+
from ._generated.models import ( # patched models
72+
ClassifierDocumentTypeDetails,
73+
AzureBlobFileListSource,
74+
AzureBlobContentSource,
75+
)
7076
from ._api_versions import FormRecognizerApiVersion, DocumentAnalysisApiVersion
7177

7278

@@ -110,6 +116,7 @@
110116
"CurrencyValue",
111117
"CustomDocumentModelsDetails",
112118
"ModelBuildMode",
119+
"DocumentClassifierDetails",
113120
"DocumentField",
114121
"DocumentKeyValueElement",
115122
"DocumentKeyValuePair",
@@ -134,6 +141,9 @@
134141
"DocumentAnalysisError",
135142
"DocumentAnalysisInnerError",
136143
"TargetAuthorization",
144+
"ClassifierDocumentTypeDetails",
145+
"AzureBlobFileListSource",
146+
"AzureBlobContentSource",
137147
]
138148

139149
__VERSION__ = VERSION

sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_document_model_administration_client.py

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
Any,
1111
Union,
1212
List,
13+
Optional,
14+
Mapping,
1315
)
1416
from azure.core.credentials import AzureKeyCredential, TokenCredential
1517
from azure.core.tracing.decorator import distributed_trace
@@ -26,13 +28,15 @@
2628
from ._document_analysis_client import DocumentAnalysisClient
2729
from ._models import (
2830
ModelBuildMode,
31+
DocumentClassifierDetails,
2932
DocumentModelDetails,
3033
DocumentModelSummary,
3134
OperationDetails,
3235
OperationSummary,
3336
ResourceDetails,
3437
TargetAuthorization,
3538
)
39+
from ._generated.models import ClassifierDocumentTypeDetails
3640

3741

3842
class DocumentModelAdministrationClient(FormRecognizerClientBase):
@@ -509,6 +513,161 @@ def get_operation(self, operation_id: str, **kwargs: Any) -> OperationDetails:
509513
api_version=self._api_version,
510514
)
511515

516+
@distributed_trace
517+
def begin_build_document_classifier(
518+
self,
519+
doc_types: Mapping[str, ClassifierDocumentTypeDetails],
520+
*,
521+
classifier_id: Optional[str] = None,
522+
description: Optional[str] = None,
523+
**kwargs: Any
524+
) -> DocumentModelAdministrationLROPoller[DocumentClassifierDetails]:
525+
"""Build a document classifier. For more information on how to build and train
526+
a custom classifier model, see https://aka.ms/azsdk/formrecognizer/buildclassifiermodel.
527+
528+
:param doc_types: Required. Mapping of document types to classify against.
529+
:keyword str classifier_id: Unique document classifier name.
530+
If not specified, a classifier ID will be created for you.
531+
:keyword str description: Document classifier description.
532+
:return: An instance of an DocumentModelAdministrationLROPoller. Call `result()` on the poller
533+
object to return a :class:`~azure.ai.formrecognizer.DocumentClassifierDetails`.
534+
:rtype: ~azure.ai.formrecognizer.DocumentModelAdministrationLROPoller[DocumentClassifierDetails]
535+
:raises ~azure.core.exceptions.HttpResponseError:
536+
537+
.. versionadded:: 2023-02-28-preview
538+
The *begin_build_document_classifier* client method.
539+
540+
.. admonition:: Example:
541+
542+
.. literalinclude:: ../samples/v3.2/sample_build_classifier.py
543+
:start-after: [START build_classifier]
544+
:end-before: [END build_classifier]
545+
:language: python
546+
:dedent: 4
547+
:caption: Build a document classifier.
548+
"""
549+
def callback(raw_response, _, headers): # pylint: disable=unused-argument
550+
op_response = \
551+
self._deserialize(self._generated_models.DocumentClassifierBuildOperationDetails, raw_response)
552+
model_info = self._deserialize(self._generated_models.DocumentClassifierDetails, op_response.result)
553+
return DocumentClassifierDetails._from_generated(model_info)
554+
555+
if self._api_version == DocumentAnalysisApiVersion.V2022_08_31:
556+
raise ValueError("Method 'begin_build_document_classifier()' is only available for API version "
557+
"V2023_02_28_PREVIEW and later")
558+
cls = kwargs.pop("cls", callback)
559+
continuation_token = kwargs.pop("continuation_token", None)
560+
polling_interval = kwargs.pop("polling_interval", self._client._config.polling_interval)
561+
if classifier_id is None:
562+
classifier_id = str(uuid.uuid4())
563+
564+
return self._client.document_classifiers.begin_build_classifier(
565+
build_request=self._generated_models.BuildDocumentClassifierRequest(
566+
classifier_id=classifier_id,
567+
description=description,
568+
doc_types=doc_types,
569+
),
570+
cls=cls,
571+
continuation_token=continuation_token,
572+
polling=LROBasePolling(
573+
timeout=polling_interval, lro_algorithms=[DocumentModelAdministrationPolling()], **kwargs
574+
),
575+
**kwargs
576+
)
577+
578+
@distributed_trace
579+
def get_document_classifier(self, classifier_id: str, **kwargs: Any) -> DocumentClassifierDetails:
580+
"""Get a document classifier by its ID.
581+
582+
:param str classifier_id: Classifier identifier.
583+
:return: DocumentClassifierDetails
584+
:rtype: ~azure.ai.formrecognizer.DocumentClassifierDetails
585+
:raises ~azure.core.exceptions.HttpResponseError or ~azure.core.exceptions.ResourceNotFoundError:
586+
587+
.. versionadded:: 2023-02-28-preview
588+
The *get_document_classifier* client method.
589+
590+
.. admonition:: Example:
591+
592+
.. literalinclude:: ../samples/v3.2/sample_manage_classifiers.py
593+
:start-after: [START get_document_classifier]
594+
:end-before: [END get_document_classifier]
595+
:language: python
596+
:dedent: 4
597+
:caption: Get a classifier by its ID.
598+
"""
599+
600+
if not classifier_id:
601+
raise ValueError("classifier_id cannot be None or empty.")
602+
603+
if self._api_version == DocumentAnalysisApiVersion.V2022_08_31:
604+
raise ValueError("Method 'get_document_classifier()' is only available for API version "
605+
"V2023_02_28_PREVIEW and later")
606+
response = self._client.document_classifiers.get_classifier(classifier_id=classifier_id, **kwargs)
607+
return DocumentClassifierDetails._from_generated(response)
608+
609+
@distributed_trace
610+
def list_document_classifiers(self, **kwargs: Any) -> ItemPaged[DocumentClassifierDetails]:
611+
"""List information for each document classifier, including its classifier ID,
612+
description, and when it was created.
613+
614+
:return: Pageable of DocumentClassifierDetails.
615+
:rtype: ~azure.core.paging.ItemPaged[DocumentClassifierDetails]
616+
:raises ~azure.core.exceptions.HttpResponseError:
617+
618+
.. versionadded:: 2023-02-28-preview
619+
The *list_document_classifiers* client method.
620+
621+
.. admonition:: Example:
622+
623+
.. literalinclude:: ../samples/v3.2/sample_manage_classifiers.py
624+
:start-after: [START list_document_classifiers]
625+
:end-before: [END list_document_classifiers]
626+
:language: python
627+
:dedent: 4
628+
:caption: List all classifiers that were built successfully under the Form Recognizer resource.
629+
"""
630+
631+
if self._api_version == DocumentAnalysisApiVersion.V2022_08_31:
632+
raise ValueError("Method 'list_document_classifiers()' is only available for API version "
633+
"V2023_02_28_PREVIEW and later")
634+
return self._client.document_classifiers.list_classifiers( # type: ignore
635+
cls=kwargs.pop(
636+
"cls",
637+
lambda objs: [DocumentClassifierDetails._from_generated(x) for x in objs],
638+
),
639+
**kwargs
640+
)
641+
642+
@distributed_trace
643+
def delete_document_classifier(self, classifier_id: str, **kwargs: Any) -> None:
644+
"""Delete a document classifier.
645+
646+
:param str classifier_id: Classifier identifier.
647+
:rtype: None
648+
:raises ~azure.core.exceptions.HttpResponseError or ~azure.core.exceptions.ResourceNotFoundError:
649+
650+
.. versionadded:: 2023-02-28-preview
651+
The *delete_document_classifier* client method.
652+
653+
.. admonition:: Example:
654+
655+
.. literalinclude:: ../samples/v3.2/sample_manage_classifiers.py
656+
:start-after: [START delete_document_classifier]
657+
:end-before: [END delete_document_classifier]
658+
:language: python
659+
:dedent: 4
660+
:caption: Delete a classifier.
661+
"""
662+
663+
if not classifier_id:
664+
raise ValueError("classifier_id cannot be None or empty.")
665+
666+
if self._api_version == DocumentAnalysisApiVersion.V2022_08_31:
667+
raise ValueError("Method 'delete_document_classifier()' is only available for API version "
668+
"V2023_02_28_PREVIEW and later")
669+
return self._client.document_classifiers.delete_classifier(classifier_id=classifier_id, **kwargs)
670+
512671
def get_document_analysis_client(self, **kwargs: Any) -> DocumentAnalysisClient:
513672
"""Get an instance of a DocumentAnalysisClient from DocumentModelAdministrationClient.
514673

sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_generated/v2023_02_28_preview/aio/operations/_document_classifiers_operations.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
# --------------------------------------------------------------------------
99
from typing import Any, AsyncIterable, Callable, Dict, IO, Optional, TypeVar, Union, cast
1010

11+
from .....aio._async_polling import AsyncDocumentModelAdministrationClientLROPoller
1112
from azure.core.async_paging import AsyncItemPaged, AsyncList
1213
from azure.core.exceptions import ClientAuthenticationError, HttpResponseError, ResourceExistsError, ResourceNotFoundError, map_error
1314
from azure.core.pipeline import PipelineResponse
@@ -104,7 +105,7 @@ async def begin_build_classifier( # pylint: disable=inconsistent-return-stateme
104105
self,
105106
build_request: _models.BuildDocumentClassifierRequest,
106107
**kwargs: Any
107-
) -> AsyncLROPoller[None]:
108+
) -> AsyncDocumentModelAdministrationClientLROPoller[None]:
108109
"""Build document classifier.
109110
110111
Builds a custom document classifier.
@@ -120,8 +121,9 @@ async def begin_build_classifier( # pylint: disable=inconsistent-return-stateme
120121
:paramtype polling: bool or ~azure.core.polling.AsyncPollingMethod
121122
:keyword int polling_interval: Default waiting time between two polls for LRO operations if no
122123
Retry-After header is present.
123-
:return: An instance of AsyncLROPoller that returns either None or the result of cls(response)
124-
:rtype: ~azure.core.polling.AsyncLROPoller[None]
124+
:return: An instance of AsyncDocumentModelAdministrationClientLROPoller that returns either
125+
None or the result of cls(response)
126+
:rtype: ~.....aio._async_polling.AsyncDocumentModelAdministrationClientLROPoller[None]
125127
:raises: ~azure.core.exceptions.HttpResponseError
126128
"""
127129
_headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
@@ -167,13 +169,13 @@ def get_long_running_output(pipeline_response):
167169
elif polling is False: polling_method = cast(AsyncPollingMethod, AsyncNoPolling())
168170
else: polling_method = polling
169171
if cont_token:
170-
return AsyncLROPoller.from_continuation_token(
172+
return AsyncDocumentModelAdministrationClientLROPoller.from_continuation_token(
171173
polling_method=polling_method,
172174
continuation_token=cont_token,
173175
client=self._client,
174176
deserialization_callback=get_long_running_output
175177
)
176-
return AsyncLROPoller(self._client, raw_result, get_long_running_output, polling_method)
178+
return AsyncDocumentModelAdministrationClientLROPoller(self._client, raw_result, get_long_running_output, polling_method)
177179

178180
begin_build_classifier.metadata = {'url': "/documentClassifiers:build"} # type: ignore
179181

0 commit comments

Comments
 (0)