[ServiceBus] Add additional stress test coverage to ensure parity with cross-language priorities (Azure#14437)

KieranBrantnerMagee · yunhaoling · web-flow · commit a912eecf5b0a · 2020-11-03T00:30:17.000-08:00
* Add additional stress test coverage to ensure parity with cross-language priorities.  (Renew, $management, and re-opening scenarios primarily)
* Add send_session_id and some additional hooks (batch/post receive, etc) to the stress test harness to allow this.
* Fix session_id population in stress test framework (was on send instead of message)
* PR fixes; make session test actually test sessions, adjust naming to normalize with our conventions now that this tool is more formalized, and touch up the autorenew logic to be more robust against delays.

Co-authored-by: Adam Ling (MSFT) &lt;adam_ling@outlook.com&gt;
diff --git a/sdk/servicebus/azure-servicebus/tests/stress_tests/stress_test_base.py b/sdk/servicebus/azure-servicebus/tests/stress_tests/stress_test_base.py
@@ -27,6 +27,7 @@
 class ReceiveType:
     push="push"
     pull="pull"
+    none=None
 
 
 class StressTestResults(object):
@@ -55,7 +56,7 @@ def __init__(self):
     def __repr__(self):
         return str(vars(self))
 
-    def PopulateProcessStats(self):
+    def populate_process_stats(self):
         self.timestamp = datetime.utcnow()
         try:
             self.cpu_percent = psutil.cpu_percent()
@@ -79,6 +80,7 @@ def __init__(self,
                  receive_delay = 0,
                  should_complete_messages = True,
                  max_message_count = 1,
+                 send_session_id = None,
                  fail_on_exception = True):
         self.senders = senders
         self.receivers = receivers
@@ -92,6 +94,7 @@ def __init__(self,
         self.should_complete_messages = should_complete_messages
         self.max_message_count = max_message_count
         self.fail_on_exception = fail_on_exception
+        self.send_session_id = send_session_id
 
         # Because of pickle we need to create a state object and not just pass around ourselves.
         # If we ever require multiple runs of this one after another, just make Run() reset this.
@@ -106,72 +109,82 @@ def __init__(self,
 
 
     # Plugin functions the caller can override to further tailor the test.
-    def OnSend(self, state, sent_message):
-        '''Called on every successful send'''
+    def on_send(self, state, sent_message, sender):
+        '''Called on every successful send, per message'''
         pass
 
-    def OnReceive(self, state, received_message):
-        '''Called on every successful receive'''
+    def on_receive(self, state, received_message, receiver):
+        '''Called on every successful receive, per message'''
         pass
 
+    def on_receive_batch(self, state, batch, receiver):
+        '''Called on every successful receive, at the batch or iterator level rather than per-message'''
+        pass
+
+    def post_receive(self, state, receiver):
+        '''Called after completion of every successful receive'''
+        pass
 
-    def OnComplete(self, send_results=[], receive_results=[]):
+    def on_complete(self, send_results=[], receive_results=[]):
         '''Called on stress test run completion'''
         pass
 
 
-    def PreProcessMessage(self, message):
+    def pre_process_message(self, message):
         '''Allows user to transform the message before batching or sending it.'''
         pass
 
 
-    def PreProcessMessageBatch(self, message):
+    def pre_process_message_batch(self, message):
         '''Allows user to transform the batch before sending it.'''
         pass
 
 
-    def PreProcessMessageBody(self, payload):
+    def pre_process_message_body(self, payload):
         '''Allows user to transform message payload before sending it.'''
         return payload
 
 
-    def _ScheduleIntervalLogger(self, end_time, description="", interval_seconds=30):
-        def _doIntervalLogging():
+    def _schedule_interval_logger(self, end_time, description="", interval_seconds=30):
+        def _do_interval_logging():
             if end_time > datetime.utcnow() and not self._should_stop:
-                self._state.PopulateProcessStats()
+                self._state.populate_process_stats()
                 _logger.critical("{} RECURRENT STATUS:".format(description))
                 _logger.critical(self._state)
-                self._ScheduleIntervalLogger(end_time, description, interval_seconds)
+                self._schedule_interval_logger(end_time, description, interval_seconds)
 
-        t = threading.Timer(interval_seconds, _doIntervalLogging)
+        t = threading.Timer(interval_seconds, _do_interval_logging)
         t.start()
 
 
-    def _ConstructMessage(self):
+    def _construct_message(self):
         if self.send_batch_size != None:
             batch = ServiceBusMessageBatch()
             for _ in range(self.send_batch_size):
-                message = ServiceBusMessage(self.PreProcessMessageBody("a" * self.message_size))
-                self.PreProcessMessage(message)
+                message = ServiceBusMessage(self.pre_process_message_body("a" * self.message_size))
+                self.pre_process_message(message)
                 batch.add_message(message)
             self.PreProcessMessageBatch(batch)
             return batch
         else:
-            message = ServiceBusMessage(self.PreProcessMessageBody("a" * self.message_size))
-            self.PreProcessMessage(message)
+            message = ServiceBusMessage(self.pre_process_message_body("a" * self.message_size))
+            self.pre_process_message(message)
             return message
 
-    def _Send(self, sender, end_time):
-        self._ScheduleIntervalLogger(end_time, "Sender " + str(self))
+
+    def _send(self, sender, end_time):
+        self._schedule_interval_logger(end_time, "Sender " + str(self))
         try:
             _logger.info("STARTING SENDER")
             with sender:
                 while end_time > datetime.utcnow() and not self._should_stop:
                     _logger.info("SENDING")
                     try:
-                        message = self._ConstructMessage()
+                        message = self._construct_message()
+                        if self.send_session_id != None:
+                            message.session_id = self.send_session_id
                         sender.send_messages(message)
-                        self.OnSend(self._state, message)
+                        self.on_send(self._state, message, sender)
                     except Exception as e:
                         _logger.exception("Exception during send: {}".format(e))
                         self._state.exceptions.append(e)
@@ -186,8 +199,8 @@ def _Send(self, sender, end_time):
             self._should_stop = True
             raise
 
-    def _Receive(self, receiver, end_time):
-        self._ScheduleIntervalLogger(end_time, "Receiver " + str(self))
+    def _receive(self, receiver, end_time):
+        self._schedule_interval_logger(end_time, "Receiver " + str(self))
         try:
             with receiver:
                 while end_time > datetime.utcnow() and not self._should_stop:
@@ -199,7 +212,7 @@ def _Receive(self, receiver, end_time):
                             batch = receiver.get_streaming_message_iter(max_wait_time=self.max_wait_time)
 
                         for message in batch:
-                            self.OnReceive(self._state, message)
+                            self.on_receive(self._state, message, receiver)
                             try:
                                 if self.should_complete_messages:
                                     receiver.complete_message(message)
@@ -210,6 +223,7 @@ def _Receive(self, receiver, end_time):
                             if end_time <= datetime.utcnow():
                                 break
                             time.sleep(self.receive_delay)
+                        self.post_receive(self._state, receiver)
                     except Exception as e:
                         _logger.exception("Exception during receive: {}".format(e))
                         self._state.exceptions.append(e)
@@ -223,15 +237,15 @@ def _Receive(self, receiver, end_time):
             raise
 
 
-    def Run(self):
+    def run(self):
         start_time = datetime.utcnow()
         end_time = start_time + (self._duration_override or self.duration)
         sent_messages = 0
         received_messages = 0
         with concurrent.futures.ThreadPoolExecutor(max_workers=4) as proc_pool:
             _logger.info("STARTING PROC POOL")
-            senders = [proc_pool.submit(self._Send, sender, end_time) for sender in self.senders]
-            receivers = [proc_pool.submit(self._Receive, receiver, end_time) for receiver in self.receivers]
+            senders = [proc_pool.submit(self._send, sender, end_time) for sender in self.senders]
+            receivers = [proc_pool.submit(self._receive, receiver, end_time) for receiver in self.receivers]
 
             result = StressTestResults()
             for each in concurrent.futures.as_completed(senders + receivers):
diff --git a/sdk/servicebus/azure-servicebus/tests/stress_tests/test_stress_queues.py b/sdk/servicebus/azure-servicebus/tests/stress_tests/test_stress_queues.py
@@ -10,7 +10,7 @@
 import sys
 import time
 
-from azure.servicebus import ServiceBusClient
+from azure.servicebus import ServiceBusClient, AutoLockRenewer
 from azure.servicebus._common.constants import ReceiveMode
 
 from devtools_testutils import AzureMgmtTestCase, CachedResourceGroupPreparer
@@ -35,7 +35,7 @@ def test_stress_queue_send_and_receive(self, servicebus_namespace_connection_str
                                        receivers = [sb_client.get_queue_receiver(servicebus_queue.name)],
                                        duration=timedelta(seconds=60))
 
-        result = stress_test.Run()
+        result = stress_test.run()
         assert(result.total_sent > 0)
         assert(result.total_received > 0)
 
@@ -54,7 +54,7 @@ def test_stress_queue_send_and_pull_receive(self, servicebus_namespace_connectio
                                        receive_type=ReceiveType.pull,
                                        duration=timedelta(seconds=60))
 
-        result = stress_test.Run()
+        result = stress_test.run()
         assert(result.total_sent > 0)
         assert(result.total_received > 0)
 
@@ -73,7 +73,7 @@ def test_stress_queue_batch_send_and_receive(self, servicebus_namespace_connecti
                                        duration=timedelta(seconds=60),
                                        send_batch_size=5)
 
-        result = stress_test.Run()
+        result = stress_test.run()
         assert(result.total_sent > 0)
         assert(result.total_received > 0)
 
@@ -92,7 +92,7 @@ def test_stress_queue_slow_send_and_receive(self, servicebus_namespace_connectio
                                        duration=timedelta(seconds=3501*3),
                                        send_delay=3500)
 
-        result = stress_test.Run()
+        result = stress_test.run()
         assert(result.total_sent > 0)
         assert(result.total_received > 0)
 
@@ -110,7 +110,7 @@ def test_stress_queue_receive_and_delete(self, servicebus_namespace_connection_s
                                        receivers = [sb_client.get_queue_receiver(servicebus_queue.name, receive_mode=ReceiveMode.ReceiveAndDelete)],
                                        duration=timedelta(seconds=60))
 
-        result = stress_test.Run()
+        result = stress_test.run()
         assert(result.total_sent > 0)
         assert(result.total_received > 0)
 
@@ -129,7 +129,7 @@ def test_stress_queue_unsettled_messages(self, servicebus_namespace_connection_s
                                        duration = timedelta(seconds=350),
                                        should_complete_messages = False)
 
-        result = stress_test.Run()
+        result = stress_test.run()
         # This test is prompted by reports of an issue where enough unsettled messages saturate a service-side cache
         # and prevent further receipt.
         assert(result.total_sent > 2500)
@@ -150,15 +150,16 @@ def test_stress_queue_receive_large_batch_size(self, servicebus_namespace_connec
                                        duration = timedelta(seconds=60),
                                        max_message_count = 50)
 
-        result = stress_test.Run()
+        result = stress_test.run()
         assert(result.total_sent > 0)
         assert(result.total_received > 0)
 
     # Cannot be defined at local scope due to pickling into multiproc runner.
     class ReceiverTimeoutStressTestRunner(StressTestRunner):
-        def OnSend(self, state, sent_message):
+        def on_send(self, state, sent_message, sender):
             '''Called on every successful send'''
             if state.total_sent % 10 == 0:
+                # To make receive time out, in push mode this delay would trigger receiver reconnection
                 time.sleep(self.max_wait_time + 5)
 
     @pytest.mark.liveTest
@@ -177,6 +178,124 @@ def test_stress_queue_pull_receive_timeout(self, servicebus_namespace_connection
             receive_type=ReceiveType.pull,
             duration=timedelta(seconds=600))
 
-        result = stress_test.Run()
+        result = stress_test.run()
         assert(result.total_sent > 0)
-        assert(result.total_received > 0)
+        assert(result.total_received > 0)
+
+
+    class LongRenewStressTestRunner(StressTestRunner):
+        def on_receive(self, state, received_message, receiver):
+            '''Called on every successful receive'''
+            renewer = AutoLockRenew()
+            renewer.register(received_message, timeout=300)
+            time.sleep(300)
+
+    @pytest.mark.liveTest
+    @pytest.mark.live_test_only
+    @CachedResourceGroupPreparer(name_prefix='servicebustest')
+    @ServiceBusNamespacePreparer(name_prefix='servicebustest')
+    @ServiceBusQueuePreparer(name_prefix='servicebustest')
+    def test_stress_queue_long_renew_send_and_receive(self, servicebus_namespace_connection_string, servicebus_queue):
+        sb_client = ServiceBusClient.from_connection_string(
+            servicebus_namespace_connection_string, debug=False)
+
+        stress_test = ServiceBusQueueStressTests.LongRenewStressTestRunner(
+                                       senders = [sb_client.get_queue_sender(servicebus_queue.name)],
+                                       receivers = [sb_client.get_queue_receiver(servicebus_queue.name)],
+                                       duration=timedelta(seconds=3000),
+                                       send_delay=300)
+
+        result = stress_test.run()
+        assert(result.total_sent > 0)
+        assert(result.total_received > 0)
+
+
+    class LongSessionRenewStressTestRunner(StressTestRunner):
+        def on_receive(self, state, received_message, receiver):
+            '''Called on every successful receive'''
+            renewer = AutoLockRenewer()
+            def on_fail(renewable, error):
+                print("FAILED AUTOLOCKRENEW: " + str(error))
+            renewer.register(receiver.session, timeout=600, on_lock_renew_failure=on_fail)
+
+    @pytest.mark.liveTest
+    @pytest.mark.live_test_only
+    @CachedResourceGroupPreparer(name_prefix='servicebustest')
+    @ServiceBusNamespacePreparer(name_prefix='servicebustest')
+    @ServiceBusQueuePreparer(name_prefix='servicebustest', requires_session=True)
+    def test_stress_queue_long_renew_session_send_and_receive(self, servicebus_namespace_connection_string, servicebus_queue):
+        sb_client = ServiceBusClient.from_connection_string(
+            servicebus_namespace_connection_string, debug=False)
+
+        session_id = 'test_stress_queue_long_renew_send_and_receive'
+
+        stress_test = ServiceBusQueueStressTests.LongSessionRenewStressTestRunner(
+                                       senders = [sb_client.get_queue_sender(servicebus_queue.name)],
+                                       receivers = [sb_client.get_queue_receiver(servicebus_queue.name, session_id=session_id)],
+                                       duration=timedelta(seconds=3000),
+                                       send_delay=300,
+                                       send_session_id=session_id)
+
+        result = stress_test.run()
+        assert(result.total_sent > 0)
+        assert(result.total_received > 0)
+
+
+    class Peekon_receiveStressTestRunner(StressTestRunner):
+        def on_receive_batch(self, state, received_message, receiver):
+            '''Called on every successful receive'''
+            assert receiver.peek_messages()[0]
+
+    @pytest.mark.liveTest
+    @pytest.mark.live_test_only
+    @CachedResourceGroupPreparer(name_prefix='servicebustest')
+    @ServiceBusNamespacePreparer(name_prefix='servicebustest')
+    @ServiceBusQueuePreparer(name_prefix='servicebustest')
+    def test_stress_queue_peek_messages(self, servicebus_namespace_connection_string, servicebus_queue):
+        sb_client = ServiceBusClient.from_connection_string(
+            servicebus_namespace_connection_string, debug=False)
+
+        stress_test = ServiceBusQueueStressTests.Peekon_receiveStressTestRunner(
+                                       senders = [sb_client.get_queue_sender(servicebus_queue.name)],
+                                       receivers = [sb_client.get_queue_receiver(servicebus_queue.name)],
+                                       duration = timedelta(seconds=300),
+                                       receive_delay = 30,
+                                       receive_type = ReceiveType.none)
+
+        result = stress_test.run()
+        assert(result.total_sent > 0)
+        # TODO: This merits better validation, to be implemented alongside full metric spread.
+
+
+    class RestartHandlerStressTestRunner(StressTestRunner):
+        def post_receive(self, state, receiver):
+            '''Called after completion of every successful receive'''
+            if state.total_received % 3 == 0:
+                receiver.__exit__()
+                receiver.__enter__()
+
+        def on_send(self, state, sent_message, sender):
+            '''Called after completion of every successful receive'''
+            if state.total_sent % 3 == 0:
+                sender.__exit__()
+                sender.__enter__()
+
+    @pytest.mark.liveTest
+    @pytest.mark.live_test_only
+    @CachedResourceGroupPreparer(name_prefix='servicebustest')
+    @ServiceBusNamespacePreparer(name_prefix='servicebustest')
+    @ServiceBusQueuePreparer(name_prefix='servicebustest')
+    def test_stress_queue_close_and_reopen(self, servicebus_namespace_connection_string, servicebus_queue):
+        sb_client = ServiceBusClient.from_connection_string(
+            servicebus_namespace_connection_string, debug=False)
+
+        stress_test = ServiceBusQueueStressTests.RestartHandlerStressTestRunner(
+                                       senders = [sb_client.get_queue_sender(servicebus_queue.name)],
+                                       receivers = [sb_client.get_queue_receiver(servicebus_queue.name)],
+                                       duration = timedelta(seconds=300),
+                                       receive_delay = 30,
+                                       send_delay = 10)
+
+        result = stress_test.run()
+        assert(result.total_sent > 0)
+        assert(result.total_received > 0)