Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions agent/src/main/java/com/cloud/agent/properties/AgentProperty.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright 2021 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloud.agent.properties;

/**
* Class of constant agent's properties available to configure on
* "agent.properties".
*<br><br>
* Not all available agent properties are defined here, but we should work to
* migrate them on demand to this class.
*
* @param <T> type of the default value.
*/
public class AgentProperty<T>{

/**
* Heartbeat update timeout. <br>
* Data type: int. <br>
* Default value: 60000 (ms).
*/
public static final AgentProperty<Integer> HEARTBEAT_UPDATE_TIMEOUT = new AgentProperty<Integer>("hearbeat.update.timeout", 60000);

private final String name;
private final T defaultValue;

private AgentProperty(String name, T value) {
this.name = name;
this.defaultValue = value;
}

public String getName() {
return name;
}

public T getDefaultValue() {
return defaultValue;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Copyright 2021 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloud.agent.properties;

import com.cloud.utils.PropertiesUtil;
import java.io.File;
import java.io.IOException;
import org.apache.cloudstack.utils.security.KeyStoreUtils;
import org.apache.commons.beanutils.ConvertUtils;
import org.apache.commons.beanutils.converters.IntegerConverter;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;

/**
* This class provides a facility to read the agent's properties file and get
* its properties, according to the {@link AgentProperty} properties constants.
*
*/
public class AgentPropertyFile {

private static final Logger logger = Logger.getLogger(AgentPropertyFile.class);

/**
* This method reads the property in the agent.properties file.
*
* @param property the property to retrieve.
* @return The value of the property. If the property is not available, the
* default defined value will be used.
*/
public static <T> T getProperty(AgentProperty<T> property) {
final T defaultValue = property.getDefaultValue();
File agentPropertiesFile = PropertiesUtil.findConfigFile(KeyStoreUtils.AGENT_PROPSFILE);

if (agentPropertiesFile != null) {
try {
String configValue = PropertiesUtil.loadFromFile(agentPropertiesFile).getProperty(property.getName());
if (StringUtils.isNotBlank(configValue)) {
ConvertUtils.register(new IntegerConverter(defaultValue), Integer.class);
return (T)ConvertUtils.convert(configValue, defaultValue.getClass());
} else {
logger.debug(String.format("Property [%s] has empty or null value. Using default value [%s].", property.getName(), property.getDefaultValue()));
}
} catch (IOException ex) {
logger.debug(String.format("Failed to get property [%s]. Using default value [%s].", property.getName(), property.getDefaultValue()), ex);
}
} else {
logger.debug(String.format("File [%s] was not found, we will use default defined values. Property [%s]: [%s].", KeyStoreUtils.AGENT_PROPSFILE, property.getName(), property.getDefaultValue()));
}

return defaultValue;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
// under the License.
package com.cloud.hypervisor.kvm.resource;

import com.cloud.agent.properties.AgentProperty;
import com.cloud.agent.properties.AgentPropertyFile;
import com.cloud.utils.script.Script;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.log4j.Logger;
import org.libvirt.Connect;
import org.libvirt.LibvirtException;
Expand All @@ -32,17 +33,21 @@
import java.util.concurrent.ConcurrentHashMap;

public class KVMHAMonitor extends KVMHABase implements Runnable {

private static final Logger s_logger = Logger.getLogger(KVMHAMonitor.class);
private final Map<String, NfsStoragePool> _storagePool = new ConcurrentHashMap<String, NfsStoragePool>();
private final Map<String, NfsStoragePool> _storagePool = new ConcurrentHashMap<>();

private final String _hostIP; /* private ip address */
/* private ip address */
private final String _hostIP;

public KVMHAMonitor(NfsStoragePool pool, String host, String scriptPath) {
if (pool != null) {
_storagePool.put(pool._poolUUID, pool);
}
_hostIP = host;
configureHeartBeatPath(scriptPath);

_heartBeatUpdateTimeout = AgentPropertyFile.getProperty(AgentProperty.HEARTBEAT_UPDATE_TIMEOUT);
}

private static synchronized void configureHeartBeatPath(String scriptPath) {
Expand All @@ -67,7 +72,7 @@ public void removeStoragePool(String uuid) {

public List<NfsStoragePool> getStoragePools() {
synchronized (_storagePool) {
return new ArrayList<NfsStoragePool>(_storagePool.values());
return new ArrayList<>(_storagePool.values());
}
}

Expand All @@ -77,108 +82,93 @@ public NfsStoragePool getStoragePool(String uuid) {
}
}

private class Monitor extends ManagedContextRunnable {
protected void runHearbeat() {
synchronized (_storagePool) {
Set<String> removedPools = new HashSet<>();
for (String uuid : _storagePool.keySet()) {
NfsStoragePool primaryStoragePool = _storagePool.get(uuid);
StoragePool storage;
try {
Connect conn = LibvirtConnection.getConnection();
storage = conn.storagePoolLookupByUUIDString(uuid);
if (storage == null || storage.getInfo().state != StoragePoolState.VIR_STORAGE_POOL_RUNNING) {
if (storage == null) {
s_logger.debug(String.format("Libvirt storage pool [%s] not found, removing from HA list.", uuid));
} else {
s_logger.debug(String.format("Libvirt storage pool [%s] found, but not running, removing from HA list.", uuid));
}

@Override
protected void runInContext() {
synchronized (_storagePool) {
Set<String> removedPools = new HashSet<String>();
for (String uuid : _storagePool.keySet()) {
NfsStoragePool primaryStoragePool = _storagePool.get(uuid);
removedPools.add(uuid);
continue;
}

// check for any that have been deregistered with libvirt and
// skip,remove them
s_logger.debug(String.format("Found NFS storage pool [%s] in libvirt, continuing.", uuid));

StoragePool storage = null;
try {
Connect conn = LibvirtConnection.getConnection();
storage = conn.storagePoolLookupByUUIDString(uuid);
if (storage == null) {
s_logger.debug("Libvirt storage pool " + uuid + " not found, removing from HA list");
removedPools.add(uuid);
continue;
} catch (LibvirtException e) {
s_logger.debug(String.format("Failed to lookup libvirt storage pool [%s].", uuid), e);

} else if (storage.getInfo().state != StoragePoolState.VIR_STORAGE_POOL_RUNNING) {
s_logger.debug("Libvirt storage pool " + uuid + " found, but not running, removing from HA list");
if (e.toString().contains("pool not found")) {
s_logger.debug(String.format("Removing pool [%s] from HA monitor since it was deleted.", uuid));
removedPools.add(uuid);
continue;
}

removedPools.add(uuid);
continue;
}
s_logger.debug("Found NFS storage pool " + uuid + " in libvirt, continuing");
}

} catch (LibvirtException e) {
s_logger.debug("Failed to lookup libvirt storage pool " + uuid + " due to: " + e);
String result = null;
for (int i = 1; i <= _heartBeatUpdateMaxTries; i++) {
Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger);
cmd.add("-i", primaryStoragePool._poolIp);
cmd.add("-p", primaryStoragePool._poolMountSourcePath);
cmd.add("-m", primaryStoragePool._mountDestPath);
cmd.add("-h", _hostIP);
result = cmd.execute();

// we only want to remove pool if it's not found, not if libvirt
// connection fails
if (e.toString().contains("pool not found")) {
s_logger.debug("removing pool from HA monitor since it was deleted");
removedPools.add(uuid);
continue;
}
}
s_logger.debug(String.format("The command (%s), to the pool [%s], has the result [%s].", cmd.toString(), uuid, result));

String result = null;
// Try multiple times, but sleep in between tries to ensure it isn't a short lived transient error
for (int i = 1; i <= _heartBeatUpdateMaxTries; i++) {
Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger);
cmd.add("-i", primaryStoragePool._poolIp);
cmd.add("-p", primaryStoragePool._poolMountSourcePath);
cmd.add("-m", primaryStoragePool._mountDestPath);
cmd.add("-h", _hostIP);
result = cmd.execute();
if (result != null) {
s_logger.warn("write heartbeat failed: " + result + ", try: " + i + " of " + _heartBeatUpdateMaxTries);
try {
Thread.sleep(_heartBeatUpdateRetrySleep);
} catch (InterruptedException e) {
s_logger.debug("[ignored] interupted between heartbeat retries.");
}
} else {
break;
if (result != null) {
s_logger.warn(String.format("Write heartbeat for pool [%s] failed: %s; try: %s of %s.", uuid, result, i, _heartBeatUpdateMaxTries));
try {
Thread.sleep(_heartBeatUpdateRetrySleep);
} catch (InterruptedException e) {
s_logger.debug("[IGNORED] Interrupted between heartbeat retries.", e);
}
} else {
break;
}

if (result != null) {
// Stop cloudstack-agent if can't write to heartbeat file.
// This will raise an alert on the mgmt server
s_logger.warn("write heartbeat failed: " + result + "; stopping cloudstack-agent");
Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger);
cmd.add("-i", primaryStoragePool._poolIp);
cmd.add("-p", primaryStoragePool._poolMountSourcePath);
cmd.add("-m", primaryStoragePool._mountDestPath);
cmd.add("-c");
result = cmd.execute();
}
}

if (!removedPools.isEmpty()) {
for (String uuid : removedPools) {
removeStoragePool(uuid);
}
if (result != null) {
s_logger.warn(String.format("Write heartbeat for pool [%s] failed: %s; stopping cloudstack-agent.", uuid, result));
Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger);
cmd.add("-i", primaryStoragePool._poolIp);
cmd.add("-p", primaryStoragePool._poolMountSourcePath);
cmd.add("-m", primaryStoragePool._mountDestPath);
Comment thread
GabrielBrascher marked this conversation as resolved.
Outdated
cmd.add("-c");
result = cmd.execute();
}
}

if (!removedPools.isEmpty()) {
for (String uuid : removedPools) {
removeStoragePool(uuid);
}
}
}

}

@Override
public void run() {
// s_logger.addAppender(new org.apache.log4j.ConsoleAppender(new
// org.apache.log4j.PatternLayout(), "System.out"));
while (true) {
Thread monitorThread = new Thread(new Monitor());
monitorThread.start();
try {
monitorThread.join();
} catch (InterruptedException e) {
s_logger.debug("[ignored] interupted joining monitor.");
}

runHearbeat();

try {
Thread.sleep(_heartBeatUpdateFreq);
} catch (InterruptedException e) {
s_logger.debug("[ignored] interupted between heartbeats.");
s_logger.debug("[IGNORED] Interrupted between heartbeats.", e);
}
}
}
Expand Down