Skip to content

Commit 7b342e3

Browse files
amarathavaleAmar Athavale
andauthored
Setting the IndexingPolicy (Azure#19540)
* Setting the IndexingPolicy * Use longs for holding partitioningKey and ids instead of Strings * Revert "Use longs for holding partitioningKey and ids instead of Strings" This reverts commit 3178e6d. * Refactoring the data generation to not cache the generated keys * Avoid the map recreation Co-authored-by: Amar Athavale <aathaval@linkedin.com>
1 parent a317d24 commit 7b342e3

File tree

18 files changed

+308
-101
lines changed

18 files changed

+308
-101
lines changed

sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/linkedin/DataGenerator.java renamed to sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/linkedin/DataGenerationIterator.java

Lines changed: 10 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,38 +3,36 @@
33

44
package com.azure.cosmos.benchmark.linkedin;
55

6-
import com.azure.cosmos.benchmark.linkedin.data.InvitationDataGenerator;
6+
import com.azure.cosmos.benchmark.linkedin.data.DataGenerator;
77
import com.azure.cosmos.benchmark.linkedin.data.Key;
88
import com.fasterxml.jackson.databind.node.ObjectNode;
9-
import java.util.Collections;
10-
import java.util.HashSet;
9+
import com.google.common.base.Preconditions;
1110
import java.util.Iterator;
1211
import java.util.Map;
13-
import java.util.Set;
1412
import java.util.stream.Collectors;
1513

1614

1715
/**
1816
* This class facilitates generating data in batches.
1917
*/
20-
public class DataGenerator implements Iterator<Map<Key, ObjectNode>> {
18+
public class DataGenerationIterator implements Iterator<Map<Key, ObjectNode>> {
2119

2220
public static final int BATCH_SIZE = 200000;
2321

24-
private final InvitationDataGenerator _dataGenerator;
22+
private final DataGenerator _dataGenerator;
2523
private final int _totalRecordCount;
2624
private int _totalDataGenerated;
27-
private final Set<Key> _generatedKeys;
2825

2926
/**
30-
* @param recordCount Number of records we want to generate generate for this test
27+
* @param dataGenerator The underlying DataGenerator capable of generating a batch of records
28+
* @param recordCount Number of records we want to generate generate for this test.
3129
* Actual data generation happens in pre-determined batch size
3230
*/
33-
public DataGenerator(int recordCount) {
34-
_dataGenerator = new InvitationDataGenerator(recordCount);
31+
public DataGenerationIterator(final DataGenerator dataGenerator, int recordCount) {
32+
_dataGenerator = Preconditions.checkNotNull(dataGenerator,
33+
"The underlying DataGenerator for this iterator can not be null");
3534
_totalRecordCount = recordCount;
3635
_totalDataGenerated = 0;
37-
_generatedKeys = new HashSet<>();
3836
}
3937

4038
@Override
@@ -47,21 +45,8 @@ public Map<Key, ObjectNode> next() {
4745
final int recordsToGenerate = Math.min(BATCH_SIZE, _totalRecordCount - _totalDataGenerated);
4846

4947
// Filter Keys in case there are duplicates
50-
final Map<Key, ObjectNode> newDocuments = _dataGenerator.generate(recordsToGenerate)
51-
.entrySet()
52-
.stream()
53-
.filter(keyObjectNodeEntry -> !_generatedKeys.contains(keyObjectNodeEntry.getKey()))
54-
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
55-
56-
_generatedKeys.addAll(newDocuments.keySet());
48+
final Map<Key, ObjectNode> newDocuments = _dataGenerator.generate(recordsToGenerate);
5749
_totalDataGenerated += newDocuments.size();
5850
return newDocuments;
5951
}
60-
61-
/**
62-
* @return Set of Keys representing each document's id and partitioningKey
63-
*/
64-
public Set<Key> getGeneratedKeys() {
65-
return Collections.unmodifiableSet(_generatedKeys);
66-
}
6752
}

sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/linkedin/DataLoader.java

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import com.azure.cosmos.CosmosAsyncDatabase;
1111
import com.azure.cosmos.CosmosItemOperation;
1212
import com.azure.cosmos.benchmark.Configuration;
13+
import com.azure.cosmos.benchmark.linkedin.data.EntityConfiguration;
1314
import com.azure.cosmos.benchmark.linkedin.data.Key;
1415
import com.azure.cosmos.models.FeedResponse;
1516
import com.azure.cosmos.models.PartitionKey;
@@ -19,7 +20,6 @@
1920
import java.util.List;
2021
import java.util.Map;
2122
import java.util.Optional;
22-
import java.util.Set;
2323
import java.util.stream.Collectors;
2424
import org.slf4j.Logger;
2525
import org.slf4j.LoggerFactory;
@@ -29,33 +29,37 @@
2929
public class DataLoader {
3030
private static final Logger LOGGER = LoggerFactory.getLogger(DataLoader.class);
3131

32-
private static final int MAX_BATCH_SIZE = 10000;
32+
private static final int MAX_BATCH_SIZE = 20000;
3333
private static final int BULK_OPERATION_CONCURRENCY = 5;
3434
private static final Duration BULK_LOAD_WAIT_DURATION = Duration.ofSeconds(120);
3535
private static final String COUNT_ALL_QUERY = "SELECT COUNT(1) FROM c";
3636
private static final String COUNT_ALL_QUERY_RESULT_FIELD = "$1";
3737

3838
private final Configuration _configuration;
3939
private final CosmosAsyncClient _client;
40-
private final DataGenerator _dataGenerator;
40+
private final DataGenerationIterator _dataGenerator;
4141

42-
public DataLoader(final Configuration configuration, final CosmosAsyncClient client) {
42+
public DataLoader(final Configuration configuration,
43+
final EntityConfiguration entityConfiguration,
44+
final CosmosAsyncClient client) {
45+
Preconditions.checkNotNull(entityConfiguration, "The test entity configuration can not be null");
4346
_configuration = Preconditions.checkNotNull(configuration,
4447
"The Workload configuration defining the parameters can not be null");
4548
_client = Preconditions.checkNotNull(client,
4649
"The CosmosAsyncClient needed for data loading can not be null");
47-
_dataGenerator = new DataGenerator(_configuration.getNumberOfPreCreatedDocuments());
50+
_dataGenerator = new DataGenerationIterator(entityConfiguration.dataGenerator(),
51+
_configuration.getNumberOfPreCreatedDocuments());
4852
}
4953

5054
public void loadData() {
51-
LOGGER.info("Starting batched data loading, loading {} documents in each iteration", DataGenerator.BATCH_SIZE);
55+
LOGGER.info("Starting batched data loading, loading {} documents in each iteration", DataGenerationIterator.BATCH_SIZE);
5256
while (_dataGenerator.hasNext()) {
5357
final Map<Key, ObjectNode> newDocuments = _dataGenerator.next();
5458
bulkCreateItems(newDocuments);
5559
newDocuments.clear();
5660
}
5761

58-
validateDataCreation(_dataGenerator.getGeneratedKeys().size());
62+
validateDataCreation(_configuration.getNumberOfPreCreatedDocuments());
5963
}
6064

6165
private void bulkCreateItems(final Map<Key, ObjectNode> records) {
@@ -102,6 +106,9 @@ private void validateDataCreation(int expectedSize) {
102106
String.format("Number of documents %d in the container %s is less than the expected threshold %f ",
103107
resultCount, containerName, (expectedSize * 0.90)));
104108
}
109+
110+
LOGGER.info("Validated {} out of the {} expected documents were loaded into [{}:{}]",
111+
resultCount, expectedSize, _configuration.getDatabaseId(), containerName);
105112
}
106113

107114
/**
@@ -120,11 +127,4 @@ private List<CosmosItemOperation> mapToCosmosItemOperation(final Map<Key, Object
120127
})
121128
.collect(Collectors.toList());
122129
}
123-
124-
/**
125-
* @return Set of Keys representing each document loaded into the test collection
126-
*/
127-
public Set<Key> getLoadedDataKeys() {
128-
return _dataGenerator.getGeneratedKeys();
129-
}
130130
}

sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/linkedin/GetTestRunner.java

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
import com.azure.cosmos.CosmosAsyncDatabase;
99
import com.azure.cosmos.benchmark.BenchmarkHelper;
1010
import com.azure.cosmos.benchmark.Configuration;
11+
import com.azure.cosmos.benchmark.linkedin.data.EntityConfiguration;
1112
import com.azure.cosmos.benchmark.linkedin.data.Key;
13+
import com.azure.cosmos.benchmark.linkedin.data.KeyGenerator;
1214
import com.azure.cosmos.benchmark.linkedin.impl.Accessor;
1315
import com.azure.cosmos.benchmark.linkedin.impl.CosmosDBDataAccessor;
1416
import com.azure.cosmos.benchmark.linkedin.impl.DocumentTransformer;
@@ -28,9 +30,6 @@
2830
import java.time.Clock;
2931
import java.time.Duration;
3032
import java.time.Instant;
31-
import java.util.ArrayList;
32-
import java.util.Collections;
33-
import java.util.Set;
3433
import java.util.concurrent.ExecutorService;
3534
import java.util.concurrent.Executors;
3635
import java.util.concurrent.Semaphore;
@@ -53,6 +52,7 @@ public class GetTestRunner {
5352
private static final Duration TERMINATION_WAIT_DURATION = Duration.ofSeconds(60);
5453

5554
private final Configuration _configuration;
55+
private final EntityConfiguration _entityConfiguration;
5656
private final Accessor<Key, JsonNode> _accessor;
5757
private final ExecutorService _executorService;
5858
private final AtomicLong _successCount;
@@ -61,30 +61,35 @@ public class GetTestRunner {
6161

6262
GetTestRunner(final Configuration configuration,
6363
final CosmosAsyncClient client,
64-
final MetricRegistry metricsRegistry) {
64+
final MetricRegistry metricsRegistry,
65+
final EntityConfiguration entityConfiguration) {
6566
Preconditions.checkNotNull(configuration,
6667
"The Workload configuration defining the parameters can not be null");
6768
Preconditions.checkNotNull(client,
6869
"Need a non-null client for setting up the Database and containers for the test");
6970
Preconditions.checkNotNull(metricsRegistry,
7071
"The MetricsRegistry can not be null");
72+
Preconditions.checkNotNull(entityConfiguration,
73+
"The Test entity configuration can not be null");
7174

7275
_configuration = configuration;
76+
_entityConfiguration = entityConfiguration;
7377
_accessor = createAccessor(configuration, client, metricsRegistry);
7478
_executorService = Executors.newFixedThreadPool(configuration.getConcurrency());
7579
_successCount = new AtomicLong(0);
7680
_errorCount = new AtomicLong(0);
7781
_semaphore = new Semaphore(configuration.getConcurrency());
7882
}
7983

80-
public void run(final Set<Key> testKeys) {
81-
final ArrayList<Key> keys = new ArrayList<>(testKeys);
82-
Collections.shuffle(keys);
84+
public void run() {
85+
KeyGenerator keyGenerator = getNewKeyGenerator();
8386
final long runStartTime = System.currentTimeMillis();
8487
long i = 0;
8588
for (; BenchmarkHelper.shouldContinue(runStartTime, i, _configuration); i++) {
86-
int index = (int) ((i % keys.size()) % Integer.MAX_VALUE);
87-
final Key key = keys.get(index);
89+
if (i > _configuration.getNumberOfPreCreatedDocuments()) {
90+
keyGenerator = getNewKeyGenerator();
91+
}
92+
final Key key = keyGenerator.key();
8893
try {
8994
_semaphore.acquire();
9095
} catch (InterruptedException e) {
@@ -149,4 +154,8 @@ private StaticDataLocator createDataLocator(Configuration configuration, CosmosA
149154
final CosmosAsyncContainer container = database.getContainer(configuration.getCollectionId());
150155
return new StaticDataLocator(collectionKey, container);
151156
}
157+
158+
private KeyGenerator getNewKeyGenerator() {
159+
return _entityConfiguration.keyGenerator();
160+
}
152161
}

sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/linkedin/LICtlWorkload.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import com.azure.cosmos.CosmosException;
88
import com.azure.cosmos.benchmark.Configuration;
99
import com.azure.cosmos.benchmark.ScheduledReporterFactory;
10+
import com.azure.cosmos.benchmark.linkedin.data.EntityConfiguration;
11+
import com.azure.cosmos.benchmark.linkedin.data.InvitationsEntityConfiguration;
1012
import com.codahale.metrics.MetricRegistry;
1113
import com.codahale.metrics.ScheduledReporter;
1214
import com.google.common.base.Preconditions;
@@ -19,6 +21,7 @@ public class LICtlWorkload {
1921
private static final Logger LOGGER = LoggerFactory.getLogger(LICtlWorkload.class);
2022

2123
private final Configuration _configuration;
24+
private final EntityConfiguration _entityConfiguration;
2225
private final CosmosAsyncClient _client;
2326
private final CosmosAsyncClient _bulkLoadClient;
2427
private final MetricRegistry _metricsRegistry;
@@ -31,15 +34,16 @@ public LICtlWorkload(final Configuration configuration) {
3134
Preconditions.checkNotNull(configuration, "The Workload configuration defining the parameters can not be null");
3235

3336
_configuration = configuration;
37+
_entityConfiguration = new InvitationsEntityConfiguration(configuration);
3438
_client = AsyncClientFactory.buildAsyncClient(configuration);
3539
_bulkLoadClient = AsyncClientFactory.buildBulkLoadAsyncClient(configuration);
3640
_metricsRegistry = new MetricRegistry();
3741
_reporter = ScheduledReporterFactory.create(_configuration, _metricsRegistry);
3842
_resourceManager = _configuration.shouldManageResources()
39-
? new ResourceManagerImpl(_configuration, _client)
43+
? new ResourceManagerImpl(_configuration, _entityConfiguration, _client)
4044
: new NoopResourceManagerImpl();
41-
_dataLoader = new DataLoader(_configuration, _bulkLoadClient);
42-
_getTestRunner = new GetTestRunner(_configuration, _client, _metricsRegistry);
45+
_dataLoader = new DataLoader(_configuration, _entityConfiguration, _bulkLoadClient);
46+
_getTestRunner = new GetTestRunner(_configuration, _client, _metricsRegistry, _entityConfiguration);
4347
}
4448

4549
public void setup() throws CosmosException {
@@ -58,7 +62,7 @@ public void run() {
5862
LOGGER.info("Executing the Get test");
5963
_reporter.start(_configuration.getPrintingInterval(), TimeUnit.SECONDS);
6064

61-
_getTestRunner.run(_dataLoader.getLoadedDataKeys());
65+
_getTestRunner.run();
6266

6367
_reporter.report();
6468
}

sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/linkedin/ResourceManagerImpl.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import com.azure.cosmos.CosmosAsyncDatabase;
99
import com.azure.cosmos.CosmosException;
1010
import com.azure.cosmos.benchmark.Configuration;
11+
import com.azure.cosmos.benchmark.linkedin.data.CollectionAttributes;
12+
import com.azure.cosmos.benchmark.linkedin.data.EntityConfiguration;
1113
import com.azure.cosmos.models.CosmosContainerProperties;
1214
import com.azure.cosmos.models.ThroughputProperties;
1315
import com.google.common.base.Preconditions;
@@ -26,14 +28,20 @@ public class ResourceManagerImpl implements ResourceManager {
2628
private static final Duration RESOURCE_CRUD_WAIT_TIME = Duration.ofSeconds(30);
2729

2830
private final Configuration _configuration;
31+
private final EntityConfiguration _entityConfiguration;
2932
private final CosmosAsyncClient _client;
3033

31-
public ResourceManagerImpl(final Configuration configuration, final CosmosAsyncClient client) {
34+
public ResourceManagerImpl(final Configuration configuration,
35+
final EntityConfiguration entityConfiguration,
36+
final CosmosAsyncClient client) {
3237
Preconditions.checkNotNull(configuration,
3338
"The Workload configuration defining the parameters can not be null");
39+
Preconditions.checkNotNull(entityConfiguration,
40+
"The Test Entity specific configuration can not be null");
3441
Preconditions.checkNotNull(client, "Need a non-null client for "
3542
+ "setting up the Database and containers for the test");
3643
_configuration = configuration;
44+
_entityConfiguration = entityConfiguration;
3745
_client = client;
3846
}
3947

@@ -56,10 +64,12 @@ public void createDatabase() throws CosmosException {
5664
public void createContainer() throws CosmosException {
5765
final String containerName = _configuration.getCollectionId();
5866
final CosmosAsyncDatabase database = _client.getDatabase(_configuration.getDatabaseId());
67+
final CollectionAttributes collectionAttributes = _entityConfiguration.collectionAttributes();
5968
try {
6069
LOGGER.info("Creating container {} in the database {}", containerName, database.getId());
6170
final CosmosContainerProperties containerProperties =
62-
new CosmosContainerProperties(containerName, PARTITION_KEY_PATH);
71+
new CosmosContainerProperties(containerName, PARTITION_KEY_PATH)
72+
.setIndexingPolicy(collectionAttributes.indexingPolicy());
6373
database.createContainerIfNotExists(containerProperties)
6474
.block(RESOURCE_CRUD_WAIT_TIME);
6575
} catch (CosmosException e) {
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
package com.azure.cosmos.benchmark.linkedin.data;
5+
6+
import com.azure.cosmos.models.IndexingPolicy;
7+
8+
9+
public interface CollectionAttributes {
10+
11+
/**
12+
* @return IndexingPolicy definition for a collection used to store a specific entity type
13+
*/
14+
IndexingPolicy indexingPolicy();
15+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
package com.azure.cosmos.benchmark.linkedin.data;
5+
6+
import com.fasterxml.jackson.databind.node.ObjectNode;
7+
import java.util.Map;
8+
9+
10+
public interface DataGenerator {
11+
/**
12+
* Generates the desired batch of records for a specific entity
13+
*
14+
* @param recordCount Number of records we want to create in this invocation
15+
* @return Map containing desired count of record key to value entries
16+
*/
17+
Map<Key, ObjectNode> generate(int recordCount);
18+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
package com.azure.cosmos.benchmark.linkedin.data;
5+
6+
/**
7+
* Interface for modeling the configurations for each entity, allowing the same implementation
8+
* to be leveraged for different use-cases
9+
*/
10+
public interface EntityConfiguration {
11+
12+
/**
13+
* @return KeyGenerator for this entity
14+
*/
15+
KeyGenerator keyGenerator();
16+
17+
/**
18+
* @return Data Generator for this entity, which facilitate generating documents conforming to this
19+
* entities schema
20+
*/
21+
DataGenerator dataGenerator();
22+
23+
/**
24+
* @return The configuration for the underlying collection used to store this entity's data
25+
*/
26+
CollectionAttributes collectionAttributes();
27+
}

0 commit comments

Comments
 (0)