[textanalytics] expose action errors (Azure#21472)

kristapratico · web-flow · commit 0d1a7bc7a913 · 2021-10-29T09:29:51.000-07:00
* update implementation

* add tests and edits

* update changelog

* review feedback
diff --git a/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md b/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
@@ -14,6 +14,7 @@ This version of the SDK defaults to the latest supported API version, which curr
 
 ### Bugs Fixed
 - Restarting a long-running operation from a saved state is now supported for the `begin_analyze_actions` and `begin_recognize_healthcare_entities` methods.
+- In the event of an action level error, available partial results are now returned for any successful actions in `begin_analyze_actions`.
 
 ### Other Changes
 - Package requires [azure-core](https://pypi.org/project/azure-core/) version 1.16.0 or greater
diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py
@@ -1751,6 +1751,18 @@ class _AnalyzeActionsType(str, Enum):
     MULTI_CATEGORY_CLASSIFY = "multi_category_classify"
 
 
+class ActionPointerKind(str, Enum):
+    RECOGNIZE_ENTITIES = "entityRecognitionTasks"
+    RECOGNIZE_PII_ENTITIES = "entityRecognitionPiiTasks"
+    EXTRACT_KEY_PHRASES = "keyPhraseExtractionTasks"
+    RECOGNIZE_LINKED_ENTITIES = "entityLinkingTasks"
+    ANALYZE_SENTIMENT = "sentimentAnalysisTasks"
+    EXTRACT_SUMMARY = "extractiveSummarizationTasks"
+    RECOGNIZE_CUSTOM_ENTITIES = "customEntityRecognitionTasks"
+    SINGLE_CATEGORY_CLASSIFY = "customSingleClassificationTasks"
+    MULTI_CATEGORY_CLASSIFY = "customMultiClassificationTasks"
+
+
 class RecognizeEntitiesAction(DictMixin):
     """RecognizeEntitiesAction encapsulates the parameters for starting a long-running Entities Recognition operation.
 
diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_response_handlers.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_response_handlers.py
@@ -37,6 +37,7 @@
     RecognizeCustomEntitiesResult,
     SingleCategoryClassifyResult,
     MultiCategoryClassifyResult,
+    ActionPointerKind
 )
 
 
@@ -340,27 +341,89 @@ def _get_property_name_from_task_type(task_type):  # pylint: disable=too-many-re
     return "key_phrase_extraction_tasks"
 
 
-def _get_good_result(task, doc_id_order, response_headers, returned_tasks_object):
+def get_task_from_pointer(task_type):  # pylint: disable=too-many-return-statements
+    if task_type == ActionPointerKind.RECOGNIZE_ENTITIES:
+        return "entity_recognition_tasks"
+    if task_type == ActionPointerKind.RECOGNIZE_PII_ENTITIES:
+        return "entity_recognition_pii_tasks"
+    if task_type == ActionPointerKind.RECOGNIZE_LINKED_ENTITIES:
+        return "entity_linking_tasks"
+    if task_type == ActionPointerKind.ANALYZE_SENTIMENT:
+        return "sentiment_analysis_tasks"
+    if task_type == ActionPointerKind.EXTRACT_SUMMARY:
+        return "extractive_summarization_tasks"
+    if task_type == ActionPointerKind.RECOGNIZE_CUSTOM_ENTITIES:
+        return "custom_entity_recognition_tasks"
+    if task_type == ActionPointerKind.SINGLE_CATEGORY_CLASSIFY:
+        return "custom_single_classification_tasks"
+    if task_type == ActionPointerKind.MULTI_CATEGORY_CLASSIFY:
+        return "custom_multi_classification_tasks"
+    return "key_phrase_extraction_tasks"
+
+
+def resolve_action_pointer(pointer):
+    import re
+    pointer_union = "|".join(value for value in ActionPointerKind)
+    found = re.search(r"#/tasks/({})/\d+".format(pointer_union), pointer)
+    if found:
+        index = int(pointer[-1])
+        task = pointer.split("#/tasks/")[1].split("/")[0]
+        property_name = get_task_from_pointer(task)
+        return property_name, index
+    raise ValueError(
+        "Unexpected response from service - action pointer '{}' is not a valid action pointer.".format(pointer)
+    )
+
+
+def get_ordered_errors(tasks_obj, task_name, doc_id_order):
+    # throw exception if error missing a target
+    missing_target = any([error for error in tasks_obj.errors if error.target is None])
+    if missing_target:
+        message = "".join(["({}) {}".format(err.code, err.message) for err in tasks_obj.errors])
+        raise HttpResponseError(message=message)
+
+    # create a DocumentError per input doc with the action error details
+    for err in tasks_obj.errors:
+        property_name, index = resolve_action_pointer(err.target)
+        actions = getattr(tasks_obj.tasks, property_name)
+        action = actions[index]
+        if action.task_name == task_name:
+            errors = [
+                DocumentError(
+                    id=doc_id,
+                    error=TextAnalyticsError(code=err.code, message=err.message)
+                ) for doc_id in doc_id_order
+            ]
+            return errors
+    raise ValueError("Unexpected response from service - no errors for missing action results.")
+
+
+def _get_doc_results(task, doc_id_order, response_headers, returned_tasks_object):
+    returned_tasks = returned_tasks_object.tasks
     current_task_type, task_name = task
     deserialization_callback = _get_deserialization_callback_from_task_type(
         current_task_type
     )
     property_name = _get_property_name_from_task_type(current_task_type)
     try:
         response_task_to_deserialize = \
-            next(task for task in getattr(returned_tasks_object, property_name) if task.task_name == task_name)
+            next(task for task in getattr(returned_tasks, property_name) if task.task_name == task_name)
     except StopIteration:
         raise ValueError("Unexpected response from service - unable to deserialize result.")
+
+    # if no results present, check for action errors
+    if response_task_to_deserialize.results is None:
+        return get_ordered_errors(returned_tasks_object, task_name, doc_id_order)
     return deserialization_callback(
         doc_id_order, response_task_to_deserialize.results, response_headers, lro=True
     )
 
 
 def get_iter_items(doc_id_order, task_order, response_headers, analyze_job_state):
     iter_items = defaultdict(list)  # map doc id to action results
-    returned_tasks_object = analyze_job_state.tasks
+    returned_tasks_object = analyze_job_state
     for task in task_order:
-        results = _get_good_result(
+        results = _get_doc_results(
             task,
             doc_id_order,
             response_headers,
diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/mock_test_responses/action_error_no_target.json b/sdk/textanalytics/azure-ai-textanalytics/tests/mock_test_responses/action_error_no_target.json
@@ -0,0 +1,40 @@
+{
+    "jobId": "59678d1c-109e-4d93-a42f-05eb5e063525",
+    "lastUpdateDateTime": "2021-10-21T23:02:34Z",
+    "createdDateTime": "2021-10-21T23:02:27Z",
+    "expirationDateTime": "2021-10-22T23:02:27Z",
+    "status": "partiallyCompleted",
+    "errors": [
+        {
+            "code": "InternalServerError",
+            "message": "1 out of 3 job tasks failed. Failed job tasks : v3.2-preview.2/custom/entities/general."
+        }
+    ],
+    "tasks": {
+        "completed": 2,
+        "failed": 1,
+        "inProgress": 0,
+        "total": 3,
+        "customEntityRecognitionTasks": [
+            {
+                "lastUpdateDateTime": "2021-10-21T23:02:34.3218701Z",
+                "taskName": "2",
+                "state": "failed"
+            }
+        ],
+        "customSingleClassificationTasks": [
+            {
+                "lastUpdateDateTime": "2021-10-21T23:02:29.3641823Z",
+                "taskName": "0",
+                "state": "succeeded"
+            }
+        ],
+        "customMultiClassificationTasks": [
+            {
+                "lastUpdateDateTime": "2021-10-21T23:02:28.7184297Z",
+                "taskName": "1",
+                "state": "succeeded"
+            }
+        ]
+    }
+}
diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/mock_test_responses/action_error_with_targets.json b/sdk/textanalytics/azure-ai-textanalytics/tests/mock_test_responses/action_error_with_targets.json
@@ -0,0 +1,175 @@
+{
+    "jobId": "59678d1c-109e-4d93-a42f-05eb5e063525",
+    "lastUpdateDateTime": "2021-10-21T23:02:34Z",
+    "createdDateTime": "2021-10-21T23:02:27Z",
+    "expirationDateTime": "2021-10-22T23:02:27Z",
+    "status": "partiallyCompleted",
+    "errors": [
+        {
+            "code": "InvalidRequest",
+            "message": "Some error2",
+            "target": "#/tasks/entityRecognitionPiiTasks/0"
+        },
+        {
+            "code": "InvalidRequest",
+            "message": "Some error6",
+            "target": "#/tasks/entityRecognitionPiiTasks/1"
+        },
+        {
+            "code": "InvalidRequest",
+            "message": "Some error0",
+            "target": "#/tasks/entityRecognitionTasks/0"
+        },
+        {
+            "code": "InvalidRequest",
+            "message": "Some error1",
+            "target": "#/tasks/keyPhraseExtractionTasks/0"
+        },
+        {
+            "code": "InvalidRequest",
+            "message": "Some error3",
+            "target": "#/tasks/entityLinkingTasks/0"
+        },
+        {
+            "code": "InvalidRequest",
+            "message": "Some error4",
+            "target": "#/tasks/sentimentAnalysisTasks/0"
+        },
+        {
+            "code": "InvalidRequest",
+            "message": "Some error5",
+            "target": "#/tasks/extractiveSummarizationTasks/0"
+        },
+        {
+            "code": "InvalidRequest",
+            "message": "Some error9",
+            "target": "#/tasks/customEntityRecognitionTasks/0"
+        },
+        {
+            "code": "InvalidRequest",
+            "message": "Some error7",
+            "target": "#/tasks/customSingleClassificationTasks/0"
+        },
+        {
+            "code": "InvalidRequest",
+            "message": "Some error8",
+            "target": "#/tasks/customMultiClassificationTasks/0"
+        }
+    ],
+    "tasks": {
+        "completed": 1,
+        "failed": 10,
+        "inProgress": 0,
+        "total": 11,
+        "entityRecognitionTasks": [
+            {
+                "lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
+                "taskName": "0",
+                "state": "failed"
+            }
+        ],
+        "entityRecognitionPiiTasks": [
+            {
+                "lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
+                "taskName": "2",
+                "state": "failed"
+            },
+            {
+                "lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
+                "taskName": "6",
+                "state": "failed"
+            }
+        ],
+        "keyPhraseExtractionTasks": [
+            {
+                "lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
+                "taskName": "1",
+                "state": "failed"
+            }
+        ],
+        "entityLinkingTasks": [
+            {
+                "lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
+                "taskName": "3",
+                "state": "failed"
+            }
+        ],
+        "sentimentAnalysisTasks": [
+            {
+                "lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
+                "taskName": "4",
+                "state": "failed"
+            }
+        ],
+        "extractiveSummarizationTasks": [
+            {
+                "lastUpdateDateTime": "2021-03-03T22:39:37.1716697Z",
+                "taskName": "5",
+                "state": "failed"
+            }
+        ],
+        "customEntityRecognitionTasks": [
+            {
+                "lastUpdateDateTime": "2021-10-21T23:02:34.3218701Z",
+                "taskName": "9",
+                "state": "failed"
+            }
+        ],
+        "customSingleClassificationTasks": [
+            {
+                "lastUpdateDateTime": "2021-10-21T23:02:34.3218701Z",
+                "taskName": "7",
+                "state": "failed"
+            },
+            {
+                "lastUpdateDateTime": "2021-10-21T23:02:29.3641823Z",
+                "taskName": "10",
+                "state": "succeeded",
+                "results": {
+                    "statistics": {
+                        "documentsCount": 2,
+                        "validDocumentsCount": 1,
+                        "erroneousDocumentsCount": 1,
+                        "transactionsCount": 1
+                    },
+                    "documents": [
+                        {
+                            "id": "1",
+                            "classification": {
+                                "category": "RateBook",
+                                "confidenceScore": 0.76
+                            },
+                            "statistics": {
+                                "charactersCount": 295,
+                                "transactionsCount": 1
+                            },
+                            "warnings": []
+                        }
+                    ],
+                    "errors": [
+                        {
+                            "id": "2",
+                            "error": {
+                                "code": "InvalidArgument",
+                                "message": "Invalid document in request.",
+                                "innererror": {
+                                    "code": "InvalidDocument",
+                                    "message": "Document text is empty."
+                                }
+                            }
+                        }
+                    ],
+                    "projectName": "single_category_classify_project_name",
+                    "deploymentName": "single_category_classify_project_name"
+                }
+            }
+        ],
+        "customMultiClassificationTasks": [
+            {
+                "lastUpdateDateTime": "2021-10-21T23:02:34.3218701Z",
+                "taskName": "8",
+                "state": "failed"
+            }
+        ]
+    }
+}
diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/test_analyze.py b/sdk/textanalytics/azure-ai-textanalytics/tests/test_analyze.py
diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/test_analyze_async.py b/sdk/textanalytics/azure-ai-textanalytics/tests/test_analyze_async.py