Skip to content

Commit fedcf66

Browse files
authored
Externalise a few timeouts & fix timeout for hostSupportsUefi in libvirt ready command wrapper (apache#8547)
This PR fixes bug introduced in apache#8502. Timeout for script execution was set to 60 ms instead of 60s which resulted in host not getting UEFI enabled. This is a blocker for 4.19 release. We do this by introducing a new agent parameter `agent.script.timeout` (default - 60 seconds) to use as a timeout for the script checking host's UEFI status. We also externalize the timeout for the ReadyCommand by introducing a new global setting `ready.command.wait` (default - 60 seconds). For ModifyStoragePoolCommand, we don't externalize the timeout to avoid confusion for the user. Since, the required timeout can vary depending on the provider in use and we are only setting the wait for default host listener for now. Instead, we reuse the global `wait` setting by dividing it by `5` making the default value of 6 minutes (1800/5 = 360s) for ModifyStoragePoolCommand. Note: the actual time, the MS waits is twice the wait set for a Command. Check reference code below. https://github.com/apache/cloudstack/blob/19250403e645c76f60b17aa4aeb4dc915f5ca206/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java#L406-L442
1 parent 1925040 commit fedcf66

File tree

7 files changed

+46
-8
lines changed

7 files changed

+46
-8
lines changed

agent/conf/agent.properties

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
# to you under the Apache License, Version 2.0 (the
66
# "License"); you may not use this file except in compliance
77
# with the License. You may obtain a copy of the License at
8-
#
8+
#
99
# http://www.apache.org/licenses/LICENSE-2.0
10-
#
10+
#
1111
# Unless required by applicable law or agreed to in writing,
1212
# software distributed under the License is distributed on an
1313
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -378,6 +378,10 @@ iscsi.session.cleanup.enabled=false
378378
# If the time is exceeded shutdown will be forced.
379379
#stop.script.timeout=120
380380

381+
# Time (in seconds) to wait for scripts to complete.
382+
# This is currently used only while checking if the host supports UEFI.
383+
#agent.script.timeout=60
384+
381385
# Definition of VMs video model type.
382386
#vm.video.hardware=
383387

agent/src/main/java/com/cloud/agent/properties/AgentProperties.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,14 @@ public class AgentProperties{
659659
*/
660660
public static final Property<Integer> STOP_SCRIPT_TIMEOUT = new Property<>("stop.script.timeout", 120);
661661

662+
/**
663+
* Time (in seconds) to wait for scripts to complete.<br>
664+
* This is currently used only while checking if the host supports UEFI.<br>
665+
* Data type: Integer.<br>
666+
* Default value: <code>60</code>
667+
*/
668+
public static final Property<Integer> AGENT_SCRIPT_TIMEOUT = new Property<>("agent.script.timeout", 60);
669+
662670
/**
663671
* Definition of VMs video model type.<br>
664672
* Data type: String.<br>

engine/components-api/src/main/java/com/cloud/agent/AgentManager.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ public interface AgentManager {
4747
"according to the hosts health check results",
4848
true, ConfigKey.Scope.Cluster, null);
4949

50+
ConfigKey<Integer> ReadyCommandWait = new ConfigKey<Integer>("Advanced", Integer.class, "ready.command.wait",
51+
"60", "Time in seconds to wait for Ready command to return", true);
52+
5053
public enum TapAgentsAction {
5154
Add, Del, Contains,
5255
}

engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,7 @@ protected AgentAttache notifyMonitorsOfConnection(final AgentAttache attache, fi
596596

597597
final Long dcId = host.getDataCenterId();
598598
final ReadyCommand ready = new ReadyCommand(dcId, host.getId(), NumbersUtil.enableHumanReadableSizes);
599-
ready.setWait(60);
599+
ready.setWait(ReadyCommandWait.value());
600600
final Answer answer = easySend(hostId, ready);
601601
if (answer == null || !answer.getResult()) {
602602
// this is tricky part for secondary storage
@@ -1838,7 +1838,7 @@ public String getConfigComponentName() {
18381838
@Override
18391839
public ConfigKey<?>[] getConfigKeys() {
18401840
return new ConfigKey<?>[] { CheckTxnBeforeSending, Workers, Port, Wait, AlertWait, DirectAgentLoadSize,
1841-
DirectAgentPoolSize, DirectAgentThreadCap, EnableKVMAutoEnableDisable };
1841+
DirectAgentPoolSize, DirectAgentThreadCap, EnableKVMAutoEnableDisable, ReadyCommandWait };
18421842
}
18431843

18441844
protected class SetHostParamsListener implements Listener {

engine/storage/volume/src/main/java/org/apache/cloudstack/storage/datastore/provider/DefaultHostListener.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@
5757

5858
public class DefaultHostListener implements HypervisorHostListener {
5959
private static final Logger s_logger = Logger.getLogger(DefaultHostListener.class);
60+
61+
/**
62+
* Wait time for modify storage pool command to complete. We should wait for 5 minutes for the command to complete.
63+
* This should ideally be externalised as a global configuration parameter in the future (See #8506).
64+
**/
65+
private final int modifyStoragePoolCommandWait = 300; // 5 minutes
6066
@Inject
6167
AgentManager agentMgr;
6268
@Inject
@@ -84,7 +90,6 @@ public class DefaultHostListener implements HypervisorHostListener {
8490
@Inject
8591
NetworkDao networkDao;
8692

87-
8893
@Override
8994
public boolean hostAdded(long hostId) {
9095
return true;
@@ -121,7 +126,9 @@ private NicTO createNicTOFromNetworkAndOffering(NetworkVO networkVO, NetworkOffe
121126
public boolean hostConnect(long hostId, long poolId) throws StorageConflictException {
122127
StoragePool pool = (StoragePool) this.dataStoreMgr.getDataStore(poolId, DataStoreRole.Primary);
123128
ModifyStoragePoolCommand cmd = new ModifyStoragePoolCommand(true, pool);
124-
cmd.setWait(60);
129+
cmd.setWait(modifyStoragePoolCommandWait);
130+
s_logger.debug(String.format("Sending modify storage pool command to agent: %d for storage pool: %d with timeout %d seconds",
131+
hostId, poolId, cmd.getWait()));
125132
final Answer answer = agentMgr.easySend(hostId, cmd);
126133

127134
if (answer == null) {

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtReadyCommandWrapper.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
import com.cloud.agent.api.Answer;
2626
import com.cloud.agent.api.ReadyAnswer;
2727
import com.cloud.agent.api.ReadyCommand;
28+
import com.cloud.agent.properties.AgentProperties;
29+
import com.cloud.agent.properties.AgentPropertiesFileHandler;
2830
import com.cloud.host.Host;
2931
import com.cloud.hypervisor.kvm.resource.LibvirtComputingResource;
3032
import com.cloud.resource.CommandWrapper;
@@ -51,11 +53,12 @@ public Answer execute(final ReadyCommand command, final LibvirtComputingResource
5153

5254
private boolean hostSupportsUefi(boolean isUbuntuHost) {
5355
String cmd = "rpm -qa | grep -i ovmf";
56+
int timeout = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.AGENT_SCRIPT_TIMEOUT) * 1000; // Get property value & convert to milliseconds
5457
if (isUbuntuHost) {
5558
cmd = "dpkg -l ovmf";
5659
}
57-
s_logger.debug("Running command : " + cmd);
58-
int result = Script.runSimpleBashScriptForExitValue(cmd, 60, false);
60+
s_logger.debug("Running command : [" + cmd + "] with timeout : " + timeout + " ms");
61+
int result = Script.runSimpleBashScriptForExitValue(cmd, timeout, false);
5962
s_logger.debug("Got result : " + result);
6063
return result == 0;
6164
}

utils/src/main/java/com/cloud/utils/script/Script.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,19 @@ public static int runSimpleBashScriptForExitValueAvoidLogging(String command) {
531531
return runSimpleBashScriptForExitValue(command, 0, true);
532532
}
533533

534+
/**
535+
* Executes a bash script and returns the exit value of the script.
536+
*
537+
* @param command
538+
* The bash command to be executed.
539+
* @param timeout
540+
* The maximum time (in milliseconds) that the script is allowed to run before it is forcibly terminated.
541+
* @param avoidLogging
542+
* If set to true, some logging is avoided.
543+
*
544+
* @return The exit value of the script. Returns -1 if the result is null or empty, or if it cannot be parsed into
545+
* an integer which can happen in case of a timeout.
546+
*/
534547
public static int runSimpleBashScriptForExitValue(String command, int timeout, boolean avoidLogging) {
535548

536549
Script s = new Script("/bin/bash", timeout);

0 commit comments

Comments
 (0)