Skip to content

Commit 2aec059

Browse files
[Search] Add support for lexical normalizers (#23690)
* Add support for lexical normaliers * Remove copies of code in ctors * Update test session records
1 parent 9b95db1 commit 2aec059

30 files changed

+1243
-10932
lines changed

sdk/search/Azure.Search.Documents/CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Release History
22

3+
## 11.4.0-beta.3 (2021-09-07)
4+
5+
### Features Added
6+
- Support for [Lexical normalizers](https://docs.microsoft.com/azure/search/search-normalizers#normalizers) in [text analysers](https://docs.microsoft.com/rest/api/searchservice/test-analyzer) via `AnalyzeTextOptions`.
7+
38
## 11.4.0-beta.2 (2021-08-10)
49

510
### Features Added

sdk/search/Azure.Search.Documents/api/Azure.Search.Documents.netstandard2.0.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,9 +324,11 @@ public partial class AnalyzeTextOptions
324324
{
325325
public AnalyzeTextOptions(string text) { }
326326
public AnalyzeTextOptions(string text, Azure.Search.Documents.Indexes.Models.LexicalAnalyzerName analyzerName) { }
327+
public AnalyzeTextOptions(string text, Azure.Search.Documents.Indexes.Models.LexicalNormalizerName normalizerName) { }
327328
public AnalyzeTextOptions(string text, Azure.Search.Documents.Indexes.Models.LexicalTokenizerName tokenizerName) { }
328329
public Azure.Search.Documents.Indexes.Models.LexicalAnalyzerName? AnalyzerName { get { throw null; } }
329330
public System.Collections.Generic.IList<string> CharFilters { get { throw null; } }
331+
public Azure.Search.Documents.Indexes.Models.LexicalNormalizerName? NormalizerName { get { throw null; } }
330332
public string Text { get { throw null; } }
331333
public System.Collections.Generic.IList<Azure.Search.Documents.Indexes.Models.TokenFilterName> TokenFilters { get { throw null; } }
332334
public Azure.Search.Documents.Indexes.Models.LexicalTokenizerName? TokenizerName { get { throw null; } }

sdk/search/Azure.Search.Documents/src/Azure.Search.Documents.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<Project Sdk="Microsoft.NET.Sdk">
22
<PropertyGroup>
33
<AssemblyTitle>Microsoft Azure.Search.Documents client library</AssemblyTitle>
4-
<Version>11.4.0-beta.2</Version>
4+
<Version>11.4.0-beta.3</Version>
55
<!--The ApiCompatVersion is managed automatically and should not generally be modified manually.-->
66
<ApiCompatVersion>11.3.0</ApiCompatVersion>
77
<Description>

sdk/search/Azure.Search.Documents/src/Generated/Models/AnalyzeTextOptions.Serialization.cs

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sdk/search/Azure.Search.Documents/src/Indexes/Models/AnalyzeTextOptions.cs

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,42 +16,43 @@ public partial class AnalyzeTextOptions
1616
/// <param name="text">Required text to break into tokens.</param>
1717
/// <param name="analyzerName">The name of the analyzer to use to break the given <paramref name="text"/>.</param>
1818
/// <exception cref="ArgumentNullException"><paramref name="text"/> is null.</exception>
19-
public AnalyzeTextOptions(string text, LexicalAnalyzerName analyzerName)
20-
{
21-
Text = text ?? throw new ArgumentNullException(nameof(text));
22-
AnalyzerName = analyzerName;
23-
24-
TokenFilters = new ChangeTrackingList<TokenFilterName>();
25-
CharFilters = new ChangeTrackingList<string>();
26-
}
19+
public AnalyzeTextOptions(string text, LexicalAnalyzerName analyzerName) : this(text)
20+
=> AnalyzerName = analyzerName;
2721

2822
/// <summary>
2923
/// Initializes a new instance of AnalyzeRequest.
3024
/// </summary>
3125
/// <param name="text">Required text to break into tokens.</param>
3226
/// <param name="tokenizerName">The name of the tokenizer to use to break the given <paramref name="text"/>.</param>
3327
/// <exception cref="ArgumentNullException"><paramref name="text"/> is null.</exception>
34-
public AnalyzeTextOptions(string text, LexicalTokenizerName tokenizerName)
35-
{
36-
Text = text ?? throw new ArgumentNullException(nameof(text));
37-
TokenizerName = tokenizerName;
28+
public AnalyzeTextOptions(string text, LexicalTokenizerName tokenizerName) : this(text)
29+
=> TokenizerName = tokenizerName;
3830

39-
TokenFilters = new ChangeTrackingList<TokenFilterName>();
40-
CharFilters = new ChangeTrackingList<string>();
41-
}
31+
/// <summary>
32+
/// Initializes a new instance of AnalyzeRequest.
33+
/// </summary>
34+
/// <param name="text">Required text to break into tokens.</param>
35+
/// <param name="normalizerName">The name of the tokenizer to use to break the given <paramref name="text"/>.</param>
36+
/// <exception cref="ArgumentNullException"><paramref name="text"/> is null.</exception>
37+
public AnalyzeTextOptions(string text, LexicalNormalizerName normalizerName) : this(text)
38+
=> NormalizerName = normalizerName;
4239

43-
/// <summary> The name of the analyzer to use to break the given text. If this parameter is not specified, you must specify a tokenizer instead. The tokenizer and analyzer parameters are mutually exclusive. </summary>
40+
/// <summary> The name of the analyzer to use to break the given text. </summary>
4441
[CodeGenMember("Analyzer")]
4542
public LexicalAnalyzerName? AnalyzerName { get; }
4643

47-
/// <summary> The name of the tokenizer to use to break the given text. If this parameter is not specified, you must specify an analyzer instead. The tokenizer and analyzer parameters are mutually exclusive. </summary>
44+
/// <summary> The name of the tokenizer to use to break the given text. </summary>
4845
[CodeGenMember("Tokenizer")]
4946
public LexicalTokenizerName? TokenizerName { get; }
5047

51-
/// <summary> An optional list of token filters to use when breaking the given text. This parameter can only be set when using the tokenizer parameter. </summary>
48+
/// <summary> The name of the normalizer to use to normalize the given text. </summary>
49+
[CodeGenMember("Normalizer")]
50+
public LexicalNormalizerName? NormalizerName { get; }
51+
52+
/// <summary> An optional list of token filters to use when breaking the given text. </summary>
5253
public IList<TokenFilterName> TokenFilters { get; }
5354

54-
/// <summary> An optional list of character filters to use when breaking the given text. This parameter can only be set when using the tokenizer parameter. </summary>
55+
/// <summary> An optional list of character filters to use when breaking the given text. </summary>
5556
public IList<string> CharFilters { get; }
5657
}
5758
}

sdk/search/Azure.Search.Documents/src/autorest.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ copy them locally in `/sdk/search/generate.ps1` and reference them here.
1313
```yaml
1414
title: SearchServiceClient
1515
input-file:
16-
- https://raw.githubusercontent.com/Azure/azure-rest-api-specs/d2183715d380084ff04313a73c8803d042fe91b9/specification/search/data-plane/Azure.Search/preview/2021-04-30-Preview/searchindex.json
17-
- https://raw.githubusercontent.com/Azure/azure-rest-api-specs/d2183715d380084ff04313a73c8803d042fe91b9/specification/search/data-plane/Azure.Search/preview/2021-04-30-Preview/searchservice.json
16+
- https://raw.githubusercontent.com/Azure/azure-rest-api-specs/c99fbb96d7993daec8135a40681d9d807e3f5751/specification/search/data-plane/Azure.Search/preview/2021-04-30-Preview/searchindex.json
17+
- https://raw.githubusercontent.com/Azure/azure-rest-api-specs/c99fbb96d7993daec8135a40681d9d807e3f5751/specification/search/data-plane/Azure.Search/preview/2021-04-30-Preview/searchservice.json
1818
```
1919
2020
## Release hacks

sdk/search/Azure.Search.Documents/tests/SearchIndexClientTests.cs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
namespace Azure.Search.Documents.Tests
1717
{
18+
[ClientTestFixture(SearchClientOptions.ServiceVersion.V2020_06_30, SearchClientOptions.ServiceVersion.V2021_04_30_Preview)]
1819
public class SearchIndexClientTests : SearchTestBase
1920
{
2021
public SearchIndexClientTests(bool async, SearchClientOptions.ServiceVersion serviceVersion)
@@ -446,6 +447,31 @@ public async Task AnalyzeText()
446447
Assert.AreEqual(new[] { "The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog." }, tokens.Select(t => t.Token));
447448
}
448449

450+
[Test]
451+
[ServiceVersion(Min = SearchClientOptions.ServiceVersion.V2021_04_30_Preview)]
452+
public async Task AnalyzeTextWithNormalizer()
453+
{
454+
await using SearchResources resources = await SearchResources.GetSharedHotelsIndexAsync(this);
455+
456+
SearchIndexClient client = resources.GetIndexClient();
457+
458+
AnalyzeTextOptions request = new("I dARe YoU tO reAd It IN A nORmAl vOiCE.", LexicalNormalizerName.Lowercase);
459+
460+
Response<IReadOnlyList<AnalyzedTokenInfo>> result = await client.AnalyzeTextAsync(resources.IndexName, request);
461+
IReadOnlyList<AnalyzedTokenInfo> tokens = result.Value;
462+
463+
Assert.AreEqual(1, tokens.Count);
464+
Assert.AreEqual("i dare you to read it in a normal voice.", tokens[0].Token);
465+
466+
request = new("Item ① in my ⑽ point rant is that 75⁰F is uncomfortably warm.", LexicalNormalizerName.AsciiFolding);
467+
468+
result = await client.AnalyzeTextAsync(resources.IndexName, request);
469+
tokens = result.Value;
470+
471+
Assert.AreEqual(1, tokens.Count);
472+
Assert.AreEqual("Item 1 in my (10) point rant is that 750F is uncomfortably warm.", tokens[0].Token);
473+
}
474+
449475
[Test]
450476
public async Task SetScoringProfile()
451477
{

sdk/search/Azure.Search.Documents/tests/SessionRecords/SearchIndexClientTests/AnalyzeText.json

Lines changed: 9 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sdk/search/Azure.Search.Documents/tests/SessionRecords/SearchIndexClientTests/AnalyzeTextAsync.json

Lines changed: 9 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sdk/search/Azure.Search.Documents/tests/SessionRecords/SearchIndexClientTests/AnalyzeTextWithNormalizer.json

Lines changed: 107 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)