Skip to content

Commit 1efff7f

Browse files
authored
[Search] Add skillset validation (Azure#20669)
* Add client-side validation and test. * Add skillset validation test. * Make code more Pythonic.
1 parent 6a06b0e commit 1efff7f

13 files changed

+233
-208
lines changed

sdk/search/azure-search-documents/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
- Renamed `SearchClient.speller` to `SearchClient.query_speller`.
1313
- Removed keyword arguments from `SearchClient`: `answers` and `captions`.
14+
- `SentimentSkill`, `EntityRecognitionSkill`: added client-side validation to prevent sending unsupported parameters.
1415

1516
### Bugs Fixed
1617

sdk/search/azure-search-documents/azure/search/documents/indexes/_search_indexer_client.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@
1515
get_access_conditions,
1616
normalize_endpoint,
1717
)
18-
from .models import SearchIndexerDataSourceConnection
18+
from .models import (
19+
EntityRecognitionSkillVersion,
20+
SearchIndexerDataSourceConnection,
21+
SentimentSkillVersion
22+
)
1923
from .._api_versions import DEFAULT_VERSION
2024
from .._headers_mixin import HeadersMixin
2125
from .._utils import get_authentication_policy
@@ -564,7 +568,9 @@ def create_skillset(self, skillset, **kwargs):
564568
565569
"""
566570
kwargs["headers"] = self._merge_client_headers(kwargs.get("headers"))
571+
_validate_skillset(skillset)
567572
skillset = skillset._to_generated() if hasattr(skillset, '_to_generated') else skillset # pylint:disable=protected-access
573+
568574
result = self._client.skillsets.create(skillset, **kwargs)
569575
return SearchIndexerSkillset._from_generated(result) # pylint:disable=protected-access
570576

@@ -587,6 +593,7 @@ def create_or_update_skillset(self, skillset, **kwargs):
587593
skillset, kwargs.pop("match_condition", MatchConditions.Unconditionally)
588594
)
589595
kwargs.update(access_condition)
596+
_validate_skillset(skillset)
590597
skillset = skillset._to_generated() if hasattr(skillset, '_to_generated') else skillset # pylint:disable=protected-access
591598

592599
result = self._client.skillsets.create_or_update(
@@ -596,3 +603,44 @@ def create_or_update_skillset(self, skillset, **kwargs):
596603
**kwargs
597604
)
598605
return SearchIndexerSkillset._from_generated(result) # pylint:disable=protected-access
606+
607+
def _validate_skillset(skillset):
608+
"""Validates any multi-version skills in the skillset to verify that unsupported
609+
parameters are not supplied by the user.
610+
"""
611+
skills = getattr(skillset, 'skills', None)
612+
if not skills:
613+
return
614+
615+
error_strings = []
616+
for skill in skills:
617+
try:
618+
skill_version = skill.get('skill_version')
619+
except AttributeError:
620+
skill_version = getattr(skill, 'skill_version', None)
621+
if not skill_version:
622+
continue
623+
624+
if skill_version == SentimentSkillVersion.V1:
625+
unsupported = ['model_version', 'include_opinion_mining']
626+
elif skill_version == SentimentSkillVersion.V3:
627+
unsupported = []
628+
elif skill_version == EntityRecognitionSkillVersion.V1:
629+
unsupported = ['model_version']
630+
elif skill_version == EntityRecognitionSkillVersion.V3:
631+
unsupported = ['include_typeless_entities']
632+
633+
errors = []
634+
for item in unsupported:
635+
try:
636+
if skill.get(item, None):
637+
errors.append(item)
638+
except AttributeError:
639+
if skill.__dict__.get(item, None):
640+
errors.append(item)
641+
if errors:
642+
error_strings.append("Unsupported parameters for skill version {}: {}".format(
643+
skill_version, ", ".join(errors))
644+
)
645+
if error_strings:
646+
raise ValueError("\n".join(error_strings))

sdk/search/azure-search-documents/azure/search/documents/indexes/models/_models.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ class EntityRecognitionSkill(SearchIndexerSkill):
181181
'include_typeless_entities': {'key': 'includeTypelessEntities', 'type': 'bool'},
182182
'minimum_precision': {'key': 'minimumPrecision', 'type': 'float'},
183183
'model_version': {'key': 'modelVersion', 'type': 'str'},
184+
'skill_version': {'key': 'skillVersion', 'type': 'str'}
184185
}
185186

186187
def __init__(
@@ -210,8 +211,7 @@ def _to_generated(self):
210211
categories=self.categories,
211212
default_language_code=self.default_language_code,
212213
include_typeless_entities=self.include_typeless_entities,
213-
minimum_precision=self.minimum_precision,
214-
model_version=self.model_version
214+
minimum_precision=self.minimum_precision
215215
)
216216
if self.skill_version in [EntityRecognitionSkillVersion.V3, EntityRecognitionSkillVersion.LATEST]:
217217
return _EntityRecognitionSkillV3(
@@ -221,7 +221,6 @@ def _to_generated(self):
221221
odata_type=self.odata_type,
222222
categories=self.categories,
223223
default_language_code=self.default_language_code,
224-
include_typeless_entities=self.include_typeless_entities,
225224
minimum_precision=self.minimum_precision,
226225
model_version=self.model_version
227226
)
@@ -318,6 +317,7 @@ class SentimentSkill(SearchIndexerSkill):
318317
'default_language_code': {'key': 'defaultLanguageCode', 'type': 'str'},
319318
'include_opinion_mining': {'key': 'includeOpinionMining', 'type': 'bool'},
320319
'model_version': {'key': 'modelVersion', 'type': 'str'},
320+
'skill_version': {'key': 'skillVersion', 'type': 'str'}
321321
}
322322

323323
def __init__(
@@ -331,7 +331,7 @@ def __init__(
331331
self.skill_version = skill_version
332332
self.odata_type = self.skill_version # type: str
333333
self.default_language_code = kwargs.get('default_language_code', None)
334-
self.include_opinion_mining = kwargs.get('include_opinion_mining', False)
334+
self.include_opinion_mining = kwargs.get('include_opinion_mining', None)
335335
self.model_version = kwargs.get('model_version', None)
336336

337337
def _to_generated(self):
@@ -341,10 +341,8 @@ def _to_generated(self):
341341
outputs=self.outputs,
342342
name=self.name,
343343
odata_type=self.odata_type,
344-
default_language_code=self.default_language_code,
345-
include_opinion_mining=self.include_opinion_mining,
346-
model_version=self.model_version
347-
)
344+
default_language_code=self.default_language_code
345+
)
348346
if self.skill_version in [SentimentSkillVersion.V3, SentimentSkillVersion.LATEST]:
349347
return _SentimentSkillV3(
350348
inputs=self.inputs,

sdk/search/azure-search-documents/tests/recordings/test_search_index_client_skillset_live.test_create_or_update_skillset.yaml

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@ interactions:
1818
Prefer:
1919
- return=representation
2020
User-Agent:
21-
- azsdk-python-search-documents/11.3.0b3 Python/3.9.2 (Windows-10-10.0.19041-SP0)
21+
- azsdk-python-search-documents/11.3.0b4 Python/3.9.2 (Windows-10-10.0.19041-SP0)
2222
method: PUT
2323
uri: https://searche2bf1c71.search.windows.net/skillsets('test-ss')?api-version=2021-04-30-Preview
2424
response:
2525
body:
26-
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets/$entity","@odata.etag":"\"0x8D96E6871C89346\"","name":"test-ss","description":"desc1","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":null,"description":null,"context":null,"categories":[],"defaultLanguageCode":null,"minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}'
26+
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets/$entity","@odata.etag":"\"0x8D977BF242A141A\"","name":"test-ss","description":"desc1","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":null,"description":null,"context":null,"categories":[],"defaultLanguageCode":null,"minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}'
2727
headers:
2828
cache-control:
2929
- no-cache
@@ -32,11 +32,11 @@ interactions:
3232
content-type:
3333
- application/json; odata.metadata=minimal
3434
date:
35-
- Thu, 02 Sep 2021 23:21:51 GMT
35+
- Tue, 14 Sep 2021 20:35:07 GMT
3636
elapsed-time:
37-
- '1433'
37+
- '2124'
3838
etag:
39-
- W/"0x8D96E6871C89346"
39+
- W/"0x8D977BF242A141A"
4040
expires:
4141
- '-1'
4242
location:
@@ -48,7 +48,7 @@ interactions:
4848
preference-applied:
4949
- odata.include-annotations="*"
5050
request-id:
51-
- 8ded272a-0c44-11ec-ac43-74c63bed1137
51+
- 3ff59952-159b-11ec-bda5-74c63bed1137
5252
strict-transport-security:
5353
- max-age=15724800; includeSubDomains
5454
status:
@@ -73,12 +73,12 @@ interactions:
7373
Prefer:
7474
- return=representation
7575
User-Agent:
76-
- azsdk-python-search-documents/11.3.0b3 Python/3.9.2 (Windows-10-10.0.19041-SP0)
76+
- azsdk-python-search-documents/11.3.0b4 Python/3.9.2 (Windows-10-10.0.19041-SP0)
7777
method: PUT
7878
uri: https://searche2bf1c71.search.windows.net/skillsets('test-ss')?api-version=2021-04-30-Preview
7979
response:
8080
body:
81-
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets/$entity","@odata.etag":"\"0x8D96E6871DB3571\"","name":"test-ss","description":"desc2","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":null,"description":null,"context":null,"categories":[],"defaultLanguageCode":null,"minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}'
81+
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets/$entity","@odata.etag":"\"0x8D977BF243E8A49\"","name":"test-ss","description":"desc2","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":null,"description":null,"context":null,"categories":[],"defaultLanguageCode":null,"minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}'
8282
headers:
8383
cache-control:
8484
- no-cache
@@ -87,11 +87,11 @@ interactions:
8787
content-type:
8888
- application/json; odata.metadata=minimal
8989
date:
90-
- Thu, 02 Sep 2021 23:21:51 GMT
90+
- Tue, 14 Sep 2021 20:35:07 GMT
9191
elapsed-time:
92-
- '46'
92+
- '85'
9393
etag:
94-
- W/"0x8D96E6871DB3571"
94+
- W/"0x8D977BF243E8A49"
9595
expires:
9696
- '-1'
9797
odata-version:
@@ -101,7 +101,7 @@ interactions:
101101
preference-applied:
102102
- odata.include-annotations="*"
103103
request-id:
104-
- 8ef2919c-0c44-11ec-a88c-74c63bed1137
104+
- 41558274-159b-11ec-bc0f-74c63bed1137
105105
strict-transport-security:
106106
- max-age=15724800; includeSubDomains
107107
vary:
@@ -119,12 +119,12 @@ interactions:
119119
Connection:
120120
- keep-alive
121121
User-Agent:
122-
- azsdk-python-search-documents/11.3.0b3 Python/3.9.2 (Windows-10-10.0.19041-SP0)
122+
- azsdk-python-search-documents/11.3.0b4 Python/3.9.2 (Windows-10-10.0.19041-SP0)
123123
method: GET
124124
uri: https://searche2bf1c71.search.windows.net/skillsets?api-version=2021-04-30-Preview
125125
response:
126126
body:
127-
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets","value":[{"@odata.etag":"\"0x8D96E6871DB3571\"","name":"test-ss","description":"desc2","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":"#1","description":null,"context":"/document","categories":["Person","Quantity","Organization","URL","Email","Location","DateTime"],"defaultLanguageCode":"en","minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}]}'
127+
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets","value":[{"@odata.etag":"\"0x8D977BF243E8A49\"","name":"test-ss","description":"desc2","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":"#1","description":null,"context":"/document","categories":["Person","Quantity","Organization","URL","Email","Location","DateTime"],"defaultLanguageCode":"en","minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}]}'
128128
headers:
129129
cache-control:
130130
- no-cache
@@ -133,9 +133,9 @@ interactions:
133133
content-type:
134134
- application/json; odata.metadata=minimal
135135
date:
136-
- Thu, 02 Sep 2021 23:21:51 GMT
136+
- Tue, 14 Sep 2021 20:35:07 GMT
137137
elapsed-time:
138-
- '139'
138+
- '69'
139139
expires:
140140
- '-1'
141141
odata-version:
@@ -145,7 +145,7 @@ interactions:
145145
preference-applied:
146146
- odata.include-annotations="*"
147147
request-id:
148-
- 8f03c3cb-0c44-11ec-94e8-74c63bed1137
148+
- 4169c877-159b-11ec-9f5a-74c63bed1137
149149
strict-transport-security:
150150
- max-age=15724800; includeSubDomains
151151
vary:
@@ -163,12 +163,12 @@ interactions:
163163
Connection:
164164
- keep-alive
165165
User-Agent:
166-
- azsdk-python-search-documents/11.3.0b3 Python/3.9.2 (Windows-10-10.0.19041-SP0)
166+
- azsdk-python-search-documents/11.3.0b4 Python/3.9.2 (Windows-10-10.0.19041-SP0)
167167
method: GET
168168
uri: https://searche2bf1c71.search.windows.net/skillsets('test-ss')?api-version=2021-04-30-Preview
169169
response:
170170
body:
171-
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets/$entity","@odata.etag":"\"0x8D96E6871DB3571\"","name":"test-ss","description":"desc2","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":"#1","description":null,"context":"/document","categories":["Person","Quantity","Organization","URL","Email","Location","DateTime"],"defaultLanguageCode":"en","minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}'
171+
string: '{"@odata.context":"https://searche2bf1c71.search.windows.net/$metadata#skillsets/$entity","@odata.etag":"\"0x8D977BF243E8A49\"","name":"test-ss","description":"desc2","skills":[{"@odata.type":"#Microsoft.Skills.Text.EntityRecognitionSkill","name":"#1","description":null,"context":"/document","categories":["Person","Quantity","Organization","URL","Email","Location","DateTime"],"defaultLanguageCode":"en","minimumPrecision":null,"includeTypelessEntities":null,"inputs":[{"name":"text","source":"/document/content","sourceContext":null,"inputs":[]}],"outputs":[{"name":"organizations","targetName":"organizations"}]}],"cognitiveServices":null,"knowledgeStore":null,"encryptionKey":null}'
172172
headers:
173173
cache-control:
174174
- no-cache
@@ -177,11 +177,11 @@ interactions:
177177
content-type:
178178
- application/json; odata.metadata=minimal
179179
date:
180-
- Thu, 02 Sep 2021 23:21:51 GMT
180+
- Tue, 14 Sep 2021 20:35:07 GMT
181181
elapsed-time:
182-
- '13'
182+
- '15'
183183
etag:
184-
- W/"0x8D96E6871DB3571"
184+
- W/"0x8D977BF243E8A49"
185185
expires:
186186
- '-1'
187187
odata-version:
@@ -191,7 +191,7 @@ interactions:
191191
preference-applied:
192192
- odata.include-annotations="*"
193193
request-id:
194-
- 8f22fc82-0c44-11ec-8c37-74c63bed1137
194+
- 417b5016-159b-11ec-acf0-74c63bed1137
195195
strict-transport-security:
196196
- max-age=15724800; includeSubDomains
197197
vary:

0 commit comments

Comments
 (0)