Skip to content

Commit 5c0d6dd

Browse files
[formrecognizer] Code updates (Azure#25825)
* fix value checks in dac * update test * remove repeat import * fix url stream test, remove recordings * update test assertion, add code comments for future maintenance
1 parent c1b28c4 commit 5c0d6dd

7 files changed

+68
-104
lines changed

sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_document_analysis_client.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -105,18 +105,28 @@ def begin_analyze_document(
105105
:caption: Analyze a custom document. For more samples see the `samples` folder.
106106
"""
107107

108-
if not model_id:
109-
raise ValueError("model_id cannot be None or empty.")
110-
111108
cls = kwargs.pop("cls", self._analyze_document_callback)
112109
continuation_token = kwargs.pop("continuation_token", None)
113110

111+
if continuation_token is not None:
112+
return self._client.begin_analyze_document( # type: ignore
113+
model_id=model_id,
114+
analyze_request=document, # type: ignore
115+
content_type="application/octet-stream",
116+
string_index_type="unicodeCodePoint",
117+
continuation_token=continuation_token,
118+
cls=cls,
119+
**kwargs
120+
)
121+
122+
if not model_id:
123+
raise ValueError("model_id cannot be None or empty.")
124+
114125
return self._client.begin_analyze_document( # type: ignore
115126
model_id=model_id,
116127
analyze_request=document, # type: ignore
117128
content_type="application/octet-stream",
118129
string_index_type="unicodeCodePoint",
119-
continuation_token=continuation_token,
120130
cls=cls,
121131
**kwargs
122132
)
@@ -154,22 +164,33 @@ def begin_analyze_document_from_url(
154164
:caption: Analyze a receipt. For more samples see the `samples` folder.
155165
"""
156166

167+
cls = kwargs.pop("cls", self._analyze_document_callback)
168+
continuation_token = kwargs.pop("continuation_token", None)
169+
170+
# continuation token requests do not perform the same value checks as
171+
# regular analysis requests
172+
if continuation_token is not None:
173+
return self._client.begin_analyze_document( # type: ignore
174+
model_id=model_id,
175+
analyze_request={"urlSource": document_url}, # type: ignore
176+
string_index_type="unicodeCodePoint",
177+
continuation_token=continuation_token,
178+
cls=cls,
179+
**kwargs
180+
)
181+
157182
if not model_id:
158183
raise ValueError("model_id cannot be None or empty.")
159184

160185
if not isinstance(document_url, str):
161186
raise ValueError(
162-
"'document_url' needs to be of type 'str'."
187+
"'document_url' needs to be of type 'str'. "
163188
"Please see `begin_analyze_document()` to pass a byte stream.")
164189

165-
cls = kwargs.pop("cls", self._analyze_document_callback)
166-
continuation_token = kwargs.pop("continuation_token", None)
167-
168190
return self._client.begin_analyze_document( # type: ignore
169191
model_id=model_id,
170192
analyze_request={"urlSource": document_url}, # type: ignore
171193
string_index_type="unicodeCodePoint",
172-
continuation_token=continuation_token,
173194
cls=cls,
174195
**kwargs
175196
)

sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_document_analysis_client_async.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,18 +108,28 @@ async def begin_analyze_document(
108108
:caption: Analyze a custom document. For more samples see the `samples` folder.
109109
"""
110110

111-
if not model_id:
112-
raise ValueError("model_id cannot be None or empty.")
113-
114111
cls = kwargs.pop("cls", self._analyze_document_callback)
115112
continuation_token = kwargs.pop("continuation_token", None)
116113

114+
if continuation_token is not None:
115+
return await self._client.begin_analyze_document( # type: ignore
116+
model_id=model_id,
117+
analyze_request=document, # type: ignore
118+
content_type="application/octet-stream",
119+
string_index_type="unicodeCodePoint",
120+
continuation_token=continuation_token,
121+
cls=cls,
122+
**kwargs
123+
)
124+
125+
if not model_id:
126+
raise ValueError("model_id cannot be None or empty.")
127+
117128
return await self._client.begin_analyze_document( # type: ignore
118129
model_id=model_id,
119130
analyze_request=document, # type: ignore
120131
content_type="application/octet-stream",
121132
string_index_type="unicodeCodePoint",
122-
continuation_token=continuation_token,
123133
cls=cls,
124134
**kwargs
125135
)
@@ -157,17 +167,32 @@ async def begin_analyze_document_from_url(
157167
:caption: Analyze a receipt. For more samples see the `samples` folder.
158168
"""
159169

170+
cls = kwargs.pop("cls", self._analyze_document_callback)
171+
continuation_token = kwargs.pop("continuation_token", None)
172+
173+
# continuation token requests do not perform the same value checks as
174+
# regular analysis requests
175+
if continuation_token is not None:
176+
return await self._client.begin_analyze_document( # type: ignore
177+
model_id=model_id,
178+
analyze_request={"urlSource": document_url}, # type: ignore
179+
string_index_type="unicodeCodePoint",
180+
continuation_token=continuation_token,
181+
cls=cls,
182+
**kwargs
183+
)
160184
if not model_id:
161185
raise ValueError("model_id cannot be None or empty.")
162186

163-
cls = kwargs.pop("cls", self._analyze_document_callback)
164-
continuation_token = kwargs.pop("continuation_token", None)
187+
if not isinstance(document_url, str):
188+
raise ValueError(
189+
"'document_url' needs to be of type 'str'. "
190+
"Please see `begin_analyze_document()` to pass a byte stream.")
165191

166192
return await self._client.begin_analyze_document( # type: ignore
167193
model_id=model_id,
168194
analyze_request={"urlSource": document_url}, # type: ignore
169195
string_index_type="unicodeCodePoint",
170-
continuation_token=continuation_token,
171196
cls=cls,
172197
**kwargs
173198
)

sdk/formrecognizer/azure-ai-formrecognizer/tests/recordings/test_dac_analyze_prebuilts_from_url.pyTestDACAnalyzePrebuiltsFromUrltest_receipt_url_pass_stream.json

Lines changed: 0 additions & 42 deletions
This file was deleted.

sdk/formrecognizer/azure-ai-formrecognizer/tests/recordings/test_dac_analyze_prebuilts_from_url_async.pyTestDACAnalyzePrebuiltsFromUrlAsynctest_receipt_url_pass_stream.json

Lines changed: 0 additions & 39 deletions
This file was deleted.

sdk/formrecognizer/azure-ai-formrecognizer/tests/test_dac_analyze_custom_model_async.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from azure.core.credentials import AzureKeyCredential
1212
from azure.ai.formrecognizer._generated.v2022_08_31.models import AnalyzeResultOperation
1313
from azure.ai.formrecognizer.aio import DocumentAnalysisClient, DocumentModelAdministrationClient
14-
from azure.ai.formrecognizer._generated.v2022_08_31.models import AnalyzeResultOperation
1514
from azure.ai.formrecognizer import AnalyzeResult
1615
from preparers import FormRecognizerPreparer
1716
from asynctestcase import AsyncFormRecognizerTest

sdk/formrecognizer/azure-ai-formrecognizer/tests/test_dac_analyze_prebuilts_from_url.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ def test_receipt_url_pass_stream(self, **kwargs):
218218
with open(self.receipt_png, "rb") as receipt:
219219
with pytest.raises(ValueError) as e:
220220
poller = client.begin_analyze_document_from_url("prebuilt-receipt", receipt)
221-
assert str(e) == "'document_url' needs to be of type 'str'. Please see `begin_analyze_document()` to pass a stream."
221+
assert str(e.value) == "'document_url' needs to be of type 'str'. Please see `begin_analyze_document()` to pass a byte stream."
222222

223223
@FormRecognizerPreparer()
224224
@DocumentAnalysisClientPreparer()
@@ -355,7 +355,7 @@ def test_receipt_continuation_token(self, **kwargs):
355355

356356
initial_poller = client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg)
357357
cont_token = initial_poller.continuation_token()
358-
poller = client.begin_analyze_document_from_url("prebuilt-receipt", None, continuation_token=cont_token)
358+
poller = client.begin_analyze_document_from_url(None, None, continuation_token=cont_token)
359359
result = poller.result()
360360
assert result is not None
361361
initial_poller.wait() # necessary so azure-devtools doesn't throw assertion error

sdk/formrecognizer/azure-ai-formrecognizer/tests/test_dac_analyze_prebuilts_from_url_async.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -236,16 +236,16 @@ async def test_receipt_bad_url(self, client):
236236

237237
@FormRecognizerPreparer()
238238
@DocumentAnalysisClientPreparer()
239-
@recorded_by_proxy_async
240-
async def test_receipt_url_pass_stream(self, client):
239+
async def test_receipt_url_pass_stream(self, **kwargs):
240+
client = kwargs.get("client", None)
241241

242242
with open(self.receipt_png, "rb") as fd:
243243
receipt = fd.read(4) # makes the recording smaller
244244

245-
with pytest.raises(HttpResponseError):
245+
with pytest.raises(ValueError) as e:
246246
async with client:
247247
poller = await client.begin_analyze_document_from_url("prebuilt-receipt", receipt)
248-
result = await poller.result()
248+
assert str(e.value) == "'document_url' needs to be of type 'str'. Please see `begin_analyze_document()` to pass a byte stream."
249249

250250
@FormRecognizerPreparer()
251251
@DocumentAnalysisClientPreparer()

0 commit comments

Comments
 (0)