Skip to content

Commit 0d1a7bc

Browse files
[textanalytics] expose action errors (Azure#21472)
* update implementation * add tests and edits * update changelog * review feedback
1 parent e54b08f commit 0d1a7bc

File tree

7 files changed

+644
-4
lines changed

7 files changed

+644
-4
lines changed

sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ This version of the SDK defaults to the latest supported API version, which curr
1414

1515
### Bugs Fixed
1616
- Restarting a long-running operation from a saved state is now supported for the `begin_analyze_actions` and `begin_recognize_healthcare_entities` methods.
17+
- In the event of an action level error, available partial results are now returned for any successful actions in `begin_analyze_actions`.
1718

1819
### Other Changes
1920
- Package requires [azure-core](https://pypi.org/project/azure-core/) version 1.16.0 or greater

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1751,6 +1751,18 @@ class _AnalyzeActionsType(str, Enum):
17511751
MULTI_CATEGORY_CLASSIFY = "multi_category_classify"
17521752

17531753

1754+
class ActionPointerKind(str, Enum):
1755+
RECOGNIZE_ENTITIES = "entityRecognitionTasks"
1756+
RECOGNIZE_PII_ENTITIES = "entityRecognitionPiiTasks"
1757+
EXTRACT_KEY_PHRASES = "keyPhraseExtractionTasks"
1758+
RECOGNIZE_LINKED_ENTITIES = "entityLinkingTasks"
1759+
ANALYZE_SENTIMENT = "sentimentAnalysisTasks"
1760+
EXTRACT_SUMMARY = "extractiveSummarizationTasks"
1761+
RECOGNIZE_CUSTOM_ENTITIES = "customEntityRecognitionTasks"
1762+
SINGLE_CATEGORY_CLASSIFY = "customSingleClassificationTasks"
1763+
MULTI_CATEGORY_CLASSIFY = "customMultiClassificationTasks"
1764+
1765+
17541766
class RecognizeEntitiesAction(DictMixin):
17551767
"""RecognizeEntitiesAction encapsulates the parameters for starting a long-running Entities Recognition operation.
17561768

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_response_handlers.py

Lines changed: 67 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
RecognizeCustomEntitiesResult,
3838
SingleCategoryClassifyResult,
3939
MultiCategoryClassifyResult,
40+
ActionPointerKind
4041
)
4142

4243

@@ -340,27 +341,89 @@ def _get_property_name_from_task_type(task_type): # pylint: disable=too-many-re
340341
return "key_phrase_extraction_tasks"
341342

342343

343-
def _get_good_result(task, doc_id_order, response_headers, returned_tasks_object):
344+
def get_task_from_pointer(task_type): # pylint: disable=too-many-return-statements
345+
if task_type == ActionPointerKind.RECOGNIZE_ENTITIES:
346+
return "entity_recognition_tasks"
347+
if task_type == ActionPointerKind.RECOGNIZE_PII_ENTITIES:
348+
return "entity_recognition_pii_tasks"
349+
if task_type == ActionPointerKind.RECOGNIZE_LINKED_ENTITIES:
350+
return "entity_linking_tasks"
351+
if task_type == ActionPointerKind.ANALYZE_SENTIMENT:
352+
return "sentiment_analysis_tasks"
353+
if task_type == ActionPointerKind.EXTRACT_SUMMARY:
354+
return "extractive_summarization_tasks"
355+
if task_type == ActionPointerKind.RECOGNIZE_CUSTOM_ENTITIES:
356+
return "custom_entity_recognition_tasks"
357+
if task_type == ActionPointerKind.SINGLE_CATEGORY_CLASSIFY:
358+
return "custom_single_classification_tasks"
359+
if task_type == ActionPointerKind.MULTI_CATEGORY_CLASSIFY:
360+
return "custom_multi_classification_tasks"
361+
return "key_phrase_extraction_tasks"
362+
363+
364+
def resolve_action_pointer(pointer):
365+
import re
366+
pointer_union = "|".join(value for value in ActionPointerKind)
367+
found = re.search(r"#/tasks/({})/\d+".format(pointer_union), pointer)
368+
if found:
369+
index = int(pointer[-1])
370+
task = pointer.split("#/tasks/")[1].split("/")[0]
371+
property_name = get_task_from_pointer(task)
372+
return property_name, index
373+
raise ValueError(
374+
"Unexpected response from service - action pointer '{}' is not a valid action pointer.".format(pointer)
375+
)
376+
377+
378+
def get_ordered_errors(tasks_obj, task_name, doc_id_order):
379+
# throw exception if error missing a target
380+
missing_target = any([error for error in tasks_obj.errors if error.target is None])
381+
if missing_target:
382+
message = "".join(["({}) {}".format(err.code, err.message) for err in tasks_obj.errors])
383+
raise HttpResponseError(message=message)
384+
385+
# create a DocumentError per input doc with the action error details
386+
for err in tasks_obj.errors:
387+
property_name, index = resolve_action_pointer(err.target)
388+
actions = getattr(tasks_obj.tasks, property_name)
389+
action = actions[index]
390+
if action.task_name == task_name:
391+
errors = [
392+
DocumentError(
393+
id=doc_id,
394+
error=TextAnalyticsError(code=err.code, message=err.message)
395+
) for doc_id in doc_id_order
396+
]
397+
return errors
398+
raise ValueError("Unexpected response from service - no errors for missing action results.")
399+
400+
401+
def _get_doc_results(task, doc_id_order, response_headers, returned_tasks_object):
402+
returned_tasks = returned_tasks_object.tasks
344403
current_task_type, task_name = task
345404
deserialization_callback = _get_deserialization_callback_from_task_type(
346405
current_task_type
347406
)
348407
property_name = _get_property_name_from_task_type(current_task_type)
349408
try:
350409
response_task_to_deserialize = \
351-
next(task for task in getattr(returned_tasks_object, property_name) if task.task_name == task_name)
410+
next(task for task in getattr(returned_tasks, property_name) if task.task_name == task_name)
352411
except StopIteration:
353412
raise ValueError("Unexpected response from service - unable to deserialize result.")
413+
414+
# if no results present, check for action errors
415+
if response_task_to_deserialize.results is None:
416+
return get_ordered_errors(returned_tasks_object, task_name, doc_id_order)
354417
return deserialization_callback(
355418
doc_id_order, response_task_to_deserialize.results, response_headers, lro=True
356419
)
357420

358421

359422
def get_iter_items(doc_id_order, task_order, response_headers, analyze_job_state):
360423
iter_items = defaultdict(list) # map doc id to action results
361-
returned_tasks_object = analyze_job_state.tasks
424+
returned_tasks_object = analyze_job_state
362425
for task in task_order:
363-
results = _get_good_result(
426+
results = _get_doc_results(
364427
task,
365428
doc_id_order,
366429
response_headers,
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"jobId": "59678d1c-109e-4d93-a42f-05eb5e063525",
3+
"lastUpdateDateTime": "2021-10-21T23:02:34Z",
4+
"createdDateTime": "2021-10-21T23:02:27Z",
5+
"expirationDateTime": "2021-10-22T23:02:27Z",
6+
"status": "partiallyCompleted",
7+
"errors": [
8+
{
9+
"code": "InternalServerError",
10+
"message": "1 out of 3 job tasks failed. Failed job tasks : v3.2-preview.2/custom/entities/general."
11+
}
12+
],
13+
"tasks": {
14+
"completed": 2,
15+
"failed": 1,
16+
"inProgress": 0,
17+
"total": 3,
18+
"customEntityRecognitionTasks": [
19+
{
20+
"lastUpdateDateTime": "2021-10-21T23:02:34.3218701Z",
21+
"taskName": "2",
22+
"state": "failed"
23+
}
24+
],
25+
"customSingleClassificationTasks": [
26+
{
27+
"lastUpdateDateTime": "2021-10-21T23:02:29.3641823Z",
28+
"taskName": "0",
29+
"state": "succeeded"
30+
}
31+
],
32+
"customMultiClassificationTasks": [
33+
{
34+
"lastUpdateDateTime": "2021-10-21T23:02:28.7184297Z",
35+
"taskName": "1",
36+
"state": "succeeded"
37+
}
38+
]
39+
}
40+
}
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
{
2+
"jobId": "59678d1c-109e-4d93-a42f-05eb5e063525",
3+
"lastUpdateDateTime": "2021-10-21T23:02:34Z",
4+
"createdDateTime": "2021-10-21T23:02:27Z",
5+
"expirationDateTime": "2021-10-22T23:02:27Z",
6+
"status": "partiallyCompleted",
7+
"errors": [
8+
{
9+
"code": "InvalidRequest",
10+
"message": "Some error2",
11+
"target": "#/tasks/entityRecognitionPiiTasks/0"
12+
},
13+
{
14+
"code": "InvalidRequest",
15+
"message": "Some error6",
16+
"target": "#/tasks/entityRecognitionPiiTasks/1"
17+
},
18+
{
19+
"code": "InvalidRequest",
20+
"message": "Some error0",
21+
"target": "#/tasks/entityRecognitionTasks/0"
22+
},
23+
{
24+
"code": "InvalidRequest",
25+
"message": "Some error1",
26+
"target": "#/tasks/keyPhraseExtractionTasks/0"
27+
},
28+
{
29+
"code": "InvalidRequest",
30+
"message": "Some error3",
31+
"target": "#/tasks/entityLinkingTasks/0"
32+
},
33+
{
34+
"code": "InvalidRequest",
35+
"message": "Some error4",
36+
"target": "#/tasks/sentimentAnalysisTasks/0"
37+
},
38+
{
39+
"code": "InvalidRequest",
40+
"message": "Some error5",
41+
"target": "#/tasks/extractiveSummarizationTasks/0"
42+
},
43+
{
44+
"code": "InvalidRequest",
45+
"message": "Some error9",
46+
"target": "#/tasks/customEntityRecognitionTasks/0"
47+
},
48+
{
49+
"code": "InvalidRequest",
50+
"message": "Some error7",
51+
"target": "#/tasks/customSingleClassificationTasks/0"
52+
},
53+
{
54+
"code": "InvalidRequest",
55+
"message": "Some error8",
56+
"target": "#/tasks/customMultiClassificationTasks/0"
57+
}
58+
],
59+
"tasks": {
60+
"completed": 1,
61+
"failed": 10,
62+
"inProgress": 0,
63+
"total": 11,
64+
"entityRecognitionTasks": [
65+
{
66+
"lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
67+
"taskName": "0",
68+
"state": "failed"
69+
}
70+
],
71+
"entityRecognitionPiiTasks": [
72+
{
73+
"lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
74+
"taskName": "2",
75+
"state": "failed"
76+
},
77+
{
78+
"lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
79+
"taskName": "6",
80+
"state": "failed"
81+
}
82+
],
83+
"keyPhraseExtractionTasks": [
84+
{
85+
"lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
86+
"taskName": "1",
87+
"state": "failed"
88+
}
89+
],
90+
"entityLinkingTasks": [
91+
{
92+
"lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
93+
"taskName": "3",
94+
"state": "failed"
95+
}
96+
],
97+
"sentimentAnalysisTasks": [
98+
{
99+
"lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
100+
"taskName": "4",
101+
"state": "failed"
102+
}
103+
],
104+
"extractiveSummarizationTasks": [
105+
{
106+
"lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
107+
"taskName": "5",
108+
"state": "failed"
109+
}
110+
],
111+
"customEntityRecognitionTasks": [
112+
{
113+
"lastUpdateDateTime": "2021-10-21T23:02:34.3218701Z",
114+
"taskName": "9",
115+
"state": "failed"
116+
}
117+
],
118+
"customSingleClassificationTasks": [
119+
{
120+
"lastUpdateDateTime": "2021-10-21T23:02:34.3218701Z",
121+
"taskName": "7",
122+
"state": "failed"
123+
},
124+
{
125+
"lastUpdateDateTime": "2021-10-21T23:02:29.3641823Z",
126+
"taskName": "10",
127+
"state": "succeeded",
128+
"results": {
129+
"statistics": {
130+
"documentsCount": 2,
131+
"validDocumentsCount": 1,
132+
"erroneousDocumentsCount": 1,
133+
"transactionsCount": 1
134+
},
135+
"documents": [
136+
{
137+
"id": "1",
138+
"classification": {
139+
"category": "RateBook",
140+
"confidenceScore": 0.76
141+
},
142+
"statistics": {
143+
"charactersCount": 295,
144+
"transactionsCount": 1
145+
},
146+
"warnings": []
147+
}
148+
],
149+
"errors": [
150+
{
151+
"id": "2",
152+
"error": {
153+
"code": "InvalidArgument",
154+
"message": "Invalid document in request.",
155+
"innererror": {
156+
"code": "InvalidDocument",
157+
"message": "Document text is empty."
158+
}
159+
}
160+
}
161+
],
162+
"projectName": "single_category_classify_project_name",
163+
"deploymentName": "single_category_classify_project_name"
164+
}
165+
}
166+
],
167+
"customMultiClassificationTasks": [
168+
{
169+
"lastUpdateDateTime": "2021-10-21T23:02:34.3218701Z",
170+
"taskName": "8",
171+
"state": "failed"
172+
}
173+
]
174+
}
175+
}

0 commit comments

Comments
 (0)