Skip to content

Commit acc476c

Browse files
authored
[EventHubs] Stress test using azure monitor service for metrics/traces/loggings (Azure#18837)
Addressing issue: Azure#18792
1 parent cff35f0 commit acc476c

File tree

8 files changed

+241
-52
lines changed

8 files changed

+241
-52
lines changed
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# --------------------------------------------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# Licensed under the MIT License. See License.txt in the project root for license information.
4+
# --------------------------------------------------------------------------------------------
5+
6+
import os
7+
8+
from opencensus.ext.azure import metrics_exporter
9+
from opencensus.stats import aggregation as aggregation_module
10+
from opencensus.stats import measure as measure_module
11+
from opencensus.stats import stats as stats_module
12+
from opencensus.stats import view as view_module
13+
14+
from logger import get_azure_logger
15+
16+
17+
class AzureMonitorMetric:
18+
def __init__(self, test_name, test_description=None):
19+
# oc will automatically search for the ENV VAR 'APPLICATIONINSIGHTS_CONNECTION_STRING'
20+
self.exporter = metrics_exporter.new_metrics_exporter()
21+
self.stats = stats_module.stats
22+
self.view_manager = self.stats.view_manager
23+
self.stats_recorder = self.stats.stats_recorder
24+
self.azure_logger = get_azure_logger(test_name)
25+
self.name = test_name
26+
self.desc = test_description
27+
28+
events_measure_name = "The number of events handled by " + self.name
29+
events_measure_desc = "The number of events handled by " + self.desc if self.desc else None
30+
memory_measure_name = "memory usage percentage for " + self.name
31+
memory_measure_desc = "memory usage percentage for " + self.desc if self.desc else None
32+
cpu_measure_name = "cpu usage percentage for " + self.name
33+
cpu_measure_desc = "cpu usage percentage for " + self.desc if self.desc else None
34+
error_measure_name = "error count for " + self.name
35+
error_measure_desc = "The number of errors happened while running the test for " + self.desc if self.desc else None
36+
37+
self.events_measure = measure_module.MeasureInt(events_measure_name, events_measure_desc, "events")
38+
self.memory_measure = measure_module.MeasureFloat(memory_measure_name, memory_measure_desc)
39+
self.cpu_measure = measure_module.MeasureFloat(cpu_measure_name, cpu_measure_desc)
40+
self.error_measure = measure_module.MeasureInt(error_measure_name, error_measure_desc)
41+
42+
self.events_measure_view = view_module.View(
43+
events_measure_name,
44+
events_measure_desc,
45+
[],
46+
self.events_measure,
47+
aggregation_module.SumAggregation()
48+
)
49+
50+
self.memory_measure_view = view_module.View(
51+
memory_measure_name,
52+
memory_measure_desc,
53+
[],
54+
self.memory_measure,
55+
aggregation_module.LastValueAggregation()
56+
)
57+
58+
self.cpu_measure_view = view_module.View(
59+
cpu_measure_name,
60+
cpu_measure_desc,
61+
[],
62+
self.cpu_measure,
63+
aggregation_module.LastValueAggregation()
64+
)
65+
66+
self.error_measure_view = view_module.View(
67+
error_measure_name,
68+
error_measure_desc,
69+
[],
70+
self.error_measure,
71+
aggregation_module.CountAggregation()
72+
)
73+
74+
self.view_manager.register_view(self.events_measure_view)
75+
self.view_manager.register_view(self.memory_measure_view)
76+
self.view_manager.register_view(self.cpu_measure_view)
77+
self.view_manager.register_view(self.error_measure_view)
78+
79+
self.mmap = self.stats_recorder.new_measurement_map()
80+
81+
def record_events_cpu_memory(self, number_of_events, cpu_usage, memory_usage):
82+
self.mmap.measure_int_put(self.events_measure, number_of_events)
83+
self.mmap.measure_float_put(self.memory_measure, memory_usage)
84+
self.mmap.measure_float_put(self.cpu_measure, cpu_usage)
85+
self.mmap.record()
86+
87+
def record_error(self, error, extra=None):
88+
self.mmap.measure_int_put(self.error_measure, 1)
89+
self.mmap.record()
90+
self.azure_logger.exception(
91+
"Error happened when running {}: {}. Extra info: {}".format(self.name, repr(error), extra)
92+
)

sdk/eventhub/azure-eventhub/stress/azure_eventhub_consumer_stress_async.py

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import os
1111
import logging
1212
from collections import defaultdict
13+
from functools import partial
1314

1415
from azure.identity.aio import ClientSecretCredential
1516
from azure.eventhub.aio import EventHubConsumerClient
@@ -19,6 +20,7 @@
1920

2021
from logger import get_logger
2122
from process_monitor import ProcessMonitor
23+
from app_insights_metric import AzureMonitorMetric
2224

2325

2426
def parse_starting_position(args):
@@ -65,7 +67,7 @@ def parse_starting_position(args):
6567
type=int,
6668
default=0
6769
)
68-
parser.add_argument("--parallel_recv_cnt", help="Number of receive clients doing parallel receiving", type=int)
70+
parser.add_argument("--parallel_recv_cnt", help="Number of receive clients doing parallel receiving", type=int, default=1)
6971
parser.add_argument("--proxy_hostname", type=str)
7072
parser.add_argument("--proxy_port", type=str)
7173
parser.add_argument("--proxy_username", type=str)
@@ -89,6 +91,8 @@ def parse_starting_position(args):
8991
recv_cnt_iteration_map = defaultdict(int)
9092
recv_time_map = dict()
9193

94+
azure_metric_monitor = AzureMonitorMetric("Async EventHubConsumerClient")
95+
9296

9397
class EventHubConsumerClientTest(EventHubConsumerClient):
9498
async def get_partition_ids(self):
@@ -98,7 +102,7 @@ async def get_partition_ids(self):
98102
return await super(EventHubConsumerClientTest, self).get_partition_ids()
99103

100104

101-
async def on_event_received(partition_context, event):
105+
async def on_event_received(process_monitor, partition_context, event):
102106
recv_cnt_map[partition_context.partition_id] += 1 if event else 0
103107
if recv_cnt_map[partition_context.partition_id] % LOG_PER_COUNT == 0:
104108
total_time_elapsed = time.perf_counter() - start_time
@@ -113,10 +117,15 @@ async def on_event_received(partition_context, event):
113117
recv_cnt_map[partition_context.partition_id] / total_time_elapsed,
114118
LOG_PER_COUNT / (partition_current_time - partition_previous_time) if partition_previous_time else None
115119
)
120+
azure_metric_monitor.record_events_cpu_memory(
121+
LOG_PER_COUNT,
122+
process_monitor.cpu_usage_percent,
123+
process_monitor.memory_usage_percent
124+
)
116125
await partition_context.update_checkpoint(event)
117126

118127

119-
async def on_event_batch_received(partition_context, event_batch):
128+
async def on_event_batch_received(process_monitor, partition_context, event_batch):
120129
recv_cnt_map[partition_context.partition_id] += len(event_batch)
121130
recv_cnt_iteration_map[partition_context.partition_id] += len(event_batch)
122131
if recv_cnt_iteration_map[partition_context.partition_id] > LOG_PER_COUNT:
@@ -133,9 +142,18 @@ async def on_event_batch_received(partition_context, event_batch):
133142
recv_cnt_iteration_map[partition_context.partition_id] / (partition_current_time - partition_previous_time) if partition_previous_time else None
134143
)
135144
recv_cnt_iteration_map[partition_context.partition_id] = 0
145+
azure_metric_monitor.record_events_cpu_memory(
146+
LOG_PER_COUNT,
147+
process_monitor.cpu_usage_percent,
148+
process_monitor.memory_usage_percent
149+
)
136150
await partition_context.update_checkpoint()
137151

138152

153+
async def on_error(partition_context, exception):
154+
azure_metric_monitor.record_error(exception, extra="partition: {}".format(partition_context.partition_id))
155+
156+
139157
def create_client(args):
140158

141159
if args.storage_conn_str:
@@ -198,26 +216,31 @@ def create_client(args):
198216

199217
async def run(args):
200218

201-
with ProcessMonitor("monitor_{}".format(args.log_filename), "consumer_stress_async", print_console=args.print_console):
219+
with ProcessMonitor("monitor_{}".format(args.log_filename), "consumer_stress_async", print_console=args.print_console) as process_monitor:
202220
kwargs_dict = {
203221
"prefetch": args.link_credit,
204222
"partition_id": str(args.recv_partition_id) if args.recv_partition_id else None,
205223
"track_last_enqueued_event_properties": args.track_last_enqueued_event_properties,
206-
"starting_position": starting_position
224+
"starting_position": starting_position,
225+
"on_error": on_error
207226
}
208227
if args.max_batch_size:
209228
kwargs_dict["max_batch_size"] = args.max_batch_size
210229
if args.max_wait_time:
211230
kwargs_dict["max_wait_time"] = args.max_wait_time
231+
232+
on_event_received_with_process_monitor = partial(on_event_received, process_monitor)
233+
on_event_batch_received_with_process_monitor = partial(on_event_batch_received, process_monitor)
234+
212235
if args.parallel_recv_cnt and args.parallel_recv_cnt > 1:
213236
clients = [create_client(args) for _ in range(args.parallel_recv_cnt)]
214237
tasks = [
215238
asyncio.ensure_future(
216239
clients[i].receive_batch(
217-
on_event_batch_received,
240+
on_event_batch_received_with_process_monitor,
218241
**kwargs_dict
219242
) if args.max_batch_size else clients[i].receive(
220-
on_event_received,
243+
on_event_received_with_process_monitor,
221244
**kwargs_dict
222245
)
223246
) for i in range(args.parallel_recv_cnt)
@@ -226,10 +249,10 @@ async def run(args):
226249
clients = [create_client(args)]
227250
tasks = [asyncio.ensure_future(
228251
clients[0].receive_batch(
229-
on_event_batch_received,
252+
on_event_batch_received_with_process_monitor,
230253
**kwargs_dict
231254
) if args.max_batch_size else clients[0].receive(
232-
on_event_received,
255+
on_event_received_with_process_monitor,
233256
**kwargs_dict
234257
)
235258
)]

sdk/eventhub/azure-eventhub/stress/azure_eventhub_consumer_stress_sync.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,15 @@
1010
import os
1111
import logging
1212
from collections import defaultdict
13+
from functools import partial
1314

1415
from azure.identity import ClientSecretCredential
1516
from azure.eventhub.extensions.checkpointstoreblob import BlobCheckpointStore
1617
from azure.eventhub import EventHubConsumerClient, TransportType, EventHubSharedKeyCredential
1718

1819
from logger import get_logger
1920
from process_monitor import ProcessMonitor
21+
from app_insights_metric import AzureMonitorMetric
2022

2123

2224
def parse_starting_position(args):
@@ -88,6 +90,8 @@ def parse_starting_position(args):
8890
recv_cnt_iteration_map = defaultdict(int)
8991
recv_time_map = dict()
9092

93+
azure_metric_monitor = AzureMonitorMetric("Sync EventHubConsumerClient")
94+
9195

9296
class EventHubConsumerClientTest(EventHubConsumerClient):
9397
def get_partition_ids(self):
@@ -97,7 +101,7 @@ def get_partition_ids(self):
97101
return super(EventHubConsumerClientTest, self).get_partition_ids()
98102

99103

100-
def on_event_received(partition_context, event):
104+
def on_event_received(process_monitor, partition_context, event):
101105
recv_cnt_map[partition_context.partition_id] += 1 if event else 0
102106
if recv_cnt_map[partition_context.partition_id] % LOG_PER_COUNT == 0:
103107
total_time_elapsed = time.perf_counter() - start_time
@@ -112,10 +116,15 @@ def on_event_received(partition_context, event):
112116
recv_cnt_map[partition_context.partition_id] / total_time_elapsed,
113117
LOG_PER_COUNT / (partition_current_time - partition_previous_time) if partition_previous_time else None
114118
)
119+
azure_metric_monitor.record_events_cpu_memory(
120+
LOG_PER_COUNT,
121+
process_monitor.cpu_usage_percent,
122+
process_monitor.memory_usage_percent
123+
)
115124
partition_context.update_checkpoint(event)
116125

117126

118-
def on_event_batch_received(partition_context, event_batch):
127+
def on_event_batch_received(process_monitor, partition_context, event_batch):
119128
recv_cnt_map[partition_context.partition_id] += len(event_batch)
120129
recv_cnt_iteration_map[partition_context.partition_id] += len(event_batch)
121130
if recv_cnt_iteration_map[partition_context.partition_id] > LOG_PER_COUNT:
@@ -131,9 +140,18 @@ def on_event_batch_received(partition_context, event_batch):
131140
recv_cnt_iteration_map[partition_context.partition_id] / (partition_current_time - partition_previous_time) if partition_previous_time else None
132141
)
133142
recv_cnt_iteration_map[partition_context.partition_id] = 0
143+
azure_metric_monitor.record_events_cpu_memory(
144+
LOG_PER_COUNT,
145+
process_monitor.cpu_usage_percent,
146+
process_monitor.memory_usage_percent
147+
)
134148
partition_context.update_checkpoint()
135149

136150

151+
def on_error(partition_context, exception):
152+
azure_metric_monitor.record_error(exception, extra="partition: {}".format(partition_context.partition_id))
153+
154+
137155
def create_client(args):
138156
if args.storage_conn_str:
139157
checkpoint_store = BlobCheckpointStore.from_connection_string(args.storage_conn_str, args.storage_container_name)
@@ -194,23 +212,29 @@ def create_client(args):
194212

195213

196214
def run(args):
197-
with ProcessMonitor("monitor_{}".format(args.log_filename), "consumer_stress_sync", print_console=args.print_console):
215+
216+
with ProcessMonitor("monitor_{}".format(args.log_filename), "consumer_stress_sync", print_console=args.print_console) as process_monitor:
198217
kwargs_dict = {
199218
"prefetch": args.link_credit,
200219
"partition_id": str(args.recv_partition_id) if args.recv_partition_id else None,
201220
"track_last_enqueued_event_properties": args.track_last_enqueued_event_properties,
202-
"starting_position": starting_position
221+
"starting_position": starting_position,
222+
"on_error": on_error
203223
}
204224
if args.max_batch_size:
205225
kwargs_dict["max_batch_size"] = args.max_batch_size
206226
if args.max_wait_time:
207227
kwargs_dict["max_wait_time"] = args.max_wait_time
228+
229+
on_event_received_with_process_monitor = partial(on_event_received, process_monitor)
230+
on_event_batch_received_with_process_monitor = partial(on_event_batch_received, process_monitor)
231+
208232
if args.parallel_recv_cnt and args.parallel_recv_cnt > 1:
209233
clients = [create_client(args) for _ in range(args.parallel_recv_cnt)]
210234
threads = [
211235
threading.Thread(
212236
target=clients[i].receive_batch if args.max_batch_size else clients[i].receive,
213-
args=(on_event_batch_received if args.max_batch_size else on_event_received,),
237+
args=(on_event_batch_received_with_process_monitor if args.max_batch_size else on_event_received_with_process_monitor,),
214238
kwargs=kwargs_dict,
215239
daemon=True
216240
) for i in range(args.parallel_recv_cnt)
@@ -219,7 +243,7 @@ def run(args):
219243
clients = [create_client(args)]
220244
threads = [threading.Thread(
221245
target=clients[0].receive_batch if args.max_batch_size else clients[0].receive,
222-
args=(on_event_batch_received if args.max_batch_size else on_event_received,),
246+
args=(on_event_batch_received_with_process_monitor if args.max_batch_size else on_event_received_with_process_monitor,),
223247
kwargs=kwargs_dict,
224248
daemon=True
225249
)]

0 commit comments

Comments
 (0)