use of com.emc.storageos.coordinator.client.model.SiteNetworkState in project coprhd-controller by CoprHD.
the class DrSiteNetworkMonitor method checkPing.
private void checkPing() {
Site localSite = drUtil.getLocalSite();
SiteNetworkState localNetworkState = drUtil.getSiteNetworkState(localSite.getUuid());
if (!NetworkHealth.GOOD.equals(localNetworkState.getNetworkHealth()) || localNetworkState.getNetworkLatencyInMs() != 0) {
localNetworkState.setNetworkLatencyInMs(0);
localNetworkState.setNetworkHealth(NetworkHealth.GOOD);
coordinatorClient.setTargetInfo(localSite.getUuid(), localNetworkState);
}
for (Site site : drUtil.listSites()) {
if (drUtil.isLocalSite(site)) {
// skip local site
continue;
}
SiteNetworkState siteNetworkState = drUtil.getSiteNetworkState(site.getUuid());
NetworkHealth previousState = siteNetworkState.getNetworkHealth();
String host = site.getVipEndPoint();
double ping = drUtil.testPing(host, SOCKET_TEST_PORT, NETWORK_TIMEOUT);
// if ping successful get an average, format to 3 decimal places
if (ping != -1) {
ping = (ping + drUtil.testPing(host, SOCKET_TEST_PORT, NETWORK_TIMEOUT) + drUtil.testPing(host, SOCKET_TEST_PORT, NETWORK_TIMEOUT)) / 3;
DecimalFormat df = new DecimalFormat("#.###");
ping = Double.parseDouble(df.format(ping));
}
_log.info("Ping: " + ping);
siteNetworkState.setNetworkLatencyInMs(ping);
if (ping > NETWORK_SLOW_THRESHOLD) {
siteNetworkState.setNetworkHealth(NetworkHealth.SLOW);
_log.warn("Network for standby {} is slow", site.getName());
AlertsLogger.getAlertsLogger().warn(String.format("Network for standby %s is Broken:" + "Latency was reported as %f ms", site.getName(), ping));
} else if (ping < 0) {
siteNetworkState.setNetworkHealth(NetworkHealth.BROKEN);
_log.error("Network for standby {} is broken", site.getName());
AlertsLogger.getAlertsLogger().error(String.format("Network for standby %s is Broken:" + "Latency was reported as %s ms", site.getName(), ping));
} else {
siteNetworkState.setNetworkHealth(NetworkHealth.GOOD);
}
coordinatorClient.setTargetInfo(site.getUuid(), siteNetworkState);
if (drUtil.isActiveSite()) {
SiteState state = site.getState();
if (SiteState.STANDBY_ADDING == state || SiteState.STANDBY_RESUMING == state) {
_log.info("Skip mail alert during add-standby or resume-standby for {}", site.getUuid());
continue;
}
if (!NetworkHealth.BROKEN.equals(previousState) && NetworkHealth.BROKEN.equals(siteNetworkState.getNetworkHealth())) {
// Add to systemevent log
_alertLog.error(MessageFormat.format("Network connection to site %s has been broken.", site.getName()));
// send email alert
mailHandler.sendSiteNetworkBrokenMail(site);
}
}
}
}
use of com.emc.storageos.coordinator.client.model.SiteNetworkState in project coprhd-controller by CoprHD.
the class DisasterRecoveryServiceTest method setUp.
@Before
public void setUp() throws Exception {
Constructor constructor = ProductName.class.getDeclaredConstructors()[0];
constructor.setAccessible(true);
ProductName productName = (ProductName) constructor.newInstance();
productName.setName("vipr");
SoftwareVersion version = new SoftwareVersion("vipr-2.4.0.0.100");
LinkedList<SoftwareVersion> available = new LinkedList<SoftwareVersion>();
available.add(version);
RepositoryInfo repositoryInfo = new RepositoryInfo(new SoftwareVersion("vipr-2.4.0.0.100"), available);
standby = new SiteConfigRestRep();
standby.setClusterStable(true);
standby.setFreshInstallation(true);
standby.setDbSchemaVersion("2.4");
standby.setSoftwareVersion("vipr-2.4.0.0.150");
standby.setHostIPv4AddressMap(new HashMap<String, String>());
standby.getHostIPv4AddressMap().put("vipr1", "10.247.101.100");
// setup standby site
standbySite1 = new Site();
standbySite1.setUuid("site-uuid-1");
standbySite1.setVip("10.247.101.110");
standbySite1.getHostIPv4AddressMap().put("vipr1", "10.247.101.111");
standbySite1.getHostIPv4AddressMap().put("vipr2", "10.247.101.112");
standbySite1.getHostIPv4AddressMap().put("vipr3", "10.247.101.113");
standbySite1.setState(SiteState.STANDBY_PAUSED);
standbySite1.setVdcShortId("vdc1");
standbySite1.setNodeCount(1);
standbySite2 = new Site();
standbySite2.setUuid("site-uuid-2");
standbySite2.setState(SiteState.STANDBY_SYNCED);
standbySite2.setVdcShortId("vdc1");
standbySite2.setVip("10.247.101.158");
standbySite2.setNodeCount(1);
standbySite3 = new Site();
standbySite3.setUuid("site-uuid-3");
standbySite3.setVdcShortId("fake-vdc-id");
standbySite3.setState(SiteState.ACTIVE);
standbySite3.setVdcShortId("vdc1");
standbySite3.setNodeCount(1);
primarySite = new Site();
primarySite.setUuid("primary-site-uuid");
primarySite.setVip("127.0.0.1");
primarySite.setHostIPv4AddressMap(standbySite1.getHostIPv4AddressMap());
primarySite.setHostIPv6AddressMap(standbySite1.getHostIPv6AddressMap());
primarySite.setVdcShortId("vdc1");
primarySite.setState(SiteState.ACTIVE);
primarySite.setNodeCount(3);
// mock DBClient
dbClientMock = mock(DbClientImpl.class);
// mock coordinator client
coordinator = mock(CoordinatorClient.class);
// mock ipsecconfig
IPsecConfig ipsecConfig = mock(IPsecConfig.class);
doReturn("ipsec-preshared-key").when(ipsecConfig).getPreSharedKey();
drUtil = mock(DrUtil.class);
natCheckParam = new DRNatCheckParam();
apiSignatureGeneratorMock = mock(InternalApiSignatureKeyGenerator.class);
try {
KeyGenerator keyGenerator = null;
keyGenerator = KeyGenerator.getInstance("HmacSHA256");
secretKey = keyGenerator.generateKey();
} catch (NoSuchAlgorithmException e) {
fail("generate key fail");
}
drService = spy(new DisasterRecoveryService());
drService.setDbClient(dbClientMock);
drService.setCoordinator(coordinator);
drService.setDrUtil(drUtil);
drService.setSiteMapper(new SiteMapper());
drService.setSysUtils(new SysUtils());
drService.setIpsecConfig(ipsecConfig);
drService.setApiSignatureGenerator(apiSignatureGeneratorMock);
standbyConfig = new Site();
standbyConfig.setUuid("standby-site-uuid-1");
standbyConfig.setVip(standbySite1.getVip());
standbyConfig.setHostIPv4AddressMap(standbySite1.getHostIPv4AddressMap());
standbyConfig.setHostIPv6AddressMap(standbySite1.getHostIPv6AddressMap());
standbyConfig.setNodeCount(3);
doReturn(standbyConfig.getUuid()).when(coordinator).getSiteId();
Configuration config = new ConfigurationImpl();
config.setConfig(Constants.CONFIG_DR_ACTIVE_SITEID, primarySite.getUuid());
doReturn(config).when(coordinator).queryConfiguration(Constants.CONFIG_DR_ACTIVE_KIND, Constants.CONFIG_DR_ACTIVE_ID);
doReturn("2.4").when(coordinator).getCurrentDbSchemaVersion();
doReturn(primarySite.getUuid()).when(coordinator).getSiteId();
doReturn(ClusterInfo.ClusterState.STABLE).when(coordinator).getControlNodesState();
// Don't need to record audit log in UT
doNothing().when(drService).auditDisasterRecoveryOps(any(OperationTypeEnum.class), anyString(), anyString(), anyVararg());
doReturn(repositoryInfo).when(coordinator).getTargetInfo(RepositoryInfo.class);
doReturn(standbySite1).when(drUtil).getSiteFromLocalVdc(standbySite1.getUuid());
doReturn(standbySite2).when(drUtil).getSiteFromLocalVdc(standbySite2.getUuid());
doThrow(CoordinatorException.retryables.cannotFindSite(NONEXISTENT_ID)).when(drUtil).getSiteFromLocalVdc(NONEXISTENT_ID);
doReturn(primarySite).when(drUtil).getSiteFromLocalVdc(primarySite.getUuid());
SiteNetworkState siteNetworkState = new SiteNetworkState();
siteNetworkState.setNetworkHealth(SiteNetworkState.NetworkHealth.GOOD);
doReturn(siteNetworkState).when(drUtil).getSiteNetworkState(any(String.class));
CoordinatorClientInetAddressMap addressMap = new CoordinatorClientInetAddressMap();
addressMap.setDualInetAddress(DualInetAddress.fromAddresses("10.247.101.110", ""));
doReturn(addressMap).when(coordinator).getInetAddessLookupMap();
InterProcessLock lock = mock(InterProcessLock.class);
doReturn(lock).when(coordinator).getLock(anyString());
doReturn(true).when(lock).acquire(anyInt(), any(TimeUnit.class));
doNothing().when(lock).release();
}
use of com.emc.storageos.coordinator.client.model.SiteNetworkState in project coprhd-controller by CoprHD.
the class DisasterRecoveryService method resumeStandby.
/**
* Resume data replication for a paused standby site
*
* @param uuid site UUID
* @brief Resume data replication for a paused standby site
* @return updated standby site representation
*/
@POST
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@CheckPermission(roles = { Role.SECURITY_ADMIN, Role.RESTRICTED_SECURITY_ADMIN, Role.SYSTEM_ADMIN, Role.RESTRICTED_SYSTEM_ADMIN }, blockProxies = true)
@Path("/{uuid}/resume")
public SiteRestRep resumeStandby(@PathParam("uuid") String uuid) {
log.info("Begin to resume data sync to standby site identified by uuid: {}", uuid);
Site standby = validateSiteConfig(uuid);
SiteState state = standby.getState();
if (!state.equals(SiteState.STANDBY_PAUSED) && !state.equals(SiteState.ACTIVE_DEGRADED)) {
log.error("site {} is in state {}, should be STANDBY_PAUSED or ACTIVE_DEGRADED", uuid, standby.getState());
throw APIException.badRequests.operationOnlyAllowedOnPausedSite(standby.getName(), standby.getState().toString());
}
SiteNetworkState networkState = drUtil.getSiteNetworkState(uuid);
if (networkState.getNetworkHealth() == NetworkHealth.BROKEN) {
throw APIException.internalServerErrors.siteConnectionBroken(standby.getName(), "Network health state is broken.");
}
try (InternalSiteServiceClient client = createInternalSiteServiceClient(standby)) {
commonPrecheck();
client.setCoordinatorClient(coordinator);
client.setKeyGenerator(apiSignatureGenerator);
client.resumePrecheck();
} catch (APIException e) {
throw e;
} catch (Exception e) {
throw APIException.internalServerErrors.resumeStandbyPrecheckFailed(standby.getName(), e.getMessage());
}
// Do this before tx get started which might write key to zk.
SecretKey secretKey = apiSignatureGenerator.getSignatureKey(SignatureKeyType.INTERVDC_API);
InterProcessLock lock = drUtil.getDROperationLock();
long vdcTargetVersion = DrUtil.newVdcConfigVersion();
try {
coordinator.startTransaction();
for (Site site : drUtil.listStandbySites()) {
if (site.getUuid().equals(uuid)) {
log.error("Re-init the target standby", uuid);
// init the to-be resumed standby site
long dataRevision = vdcTargetVersion;
List<Site> standbySites = drUtil.listStandbySites();
SiteConfigParam configParam = prepareSiteConfigParam(standbySites, ipsecConfig.getPreSharedKey(), uuid, dataRevision, vdcTargetVersion, secretKey);
try (InternalSiteServiceClient internalSiteServiceClient = new InternalSiteServiceClient()) {
internalSiteServiceClient.setCoordinatorClient(coordinator);
internalSiteServiceClient.setServer(site.getVipEndPoint());
internalSiteServiceClient.initStandby(configParam);
}
site.setState(SiteState.STANDBY_RESUMING);
coordinator.persistServiceConfiguration(site.toConfiguration());
drUtil.recordDrOperationStatus(site.getUuid(), InterState.RESUMING_STANDBY);
drUtil.updateVdcTargetVersion(uuid, SiteInfo.DR_OP_CHANGE_DATA_REVISION, vdcTargetVersion, dataRevision);
} else {
drUtil.updateVdcTargetVersion(site.getUuid(), SiteInfo.DR_OP_RESUME_STANDBY, vdcTargetVersion);
}
}
// update the local(active) site last
drUtil.updateVdcTargetVersion(coordinator.getSiteId(), SiteInfo.DR_OP_RESUME_STANDBY, vdcTargetVersion);
coordinator.commitTransaction();
auditDisasterRecoveryOps(OperationTypeEnum.RESUME_STANDBY, AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_BEGIN, standby.toBriefString());
return siteMapper.map(standby);
} catch (Exception e) {
log.error("Error resuming site {}", uuid, e);
coordinator.discardTransaction();
auditDisasterRecoveryOps(OperationTypeEnum.RESUME_STANDBY, AuditLogManager.AUDITLOG_FAILURE, null, standby.toBriefString());
InternalServerErrorException resumeStandbyFailedException = APIException.internalServerErrors.resumeStandbyFailed(standby.getName(), e.getMessage());
throw resumeStandbyFailedException;
} finally {
try {
lock.release();
} catch (Exception ignore) {
log.error(String.format("Lock release failed when resuming standby site: %s", uuid));
}
}
}
use of com.emc.storageos.coordinator.client.model.SiteNetworkState in project coprhd-controller by CoprHD.
the class DisasterRecoveryService method precheckForFailoverLocally.
/*
* Internal method to check whether failover to standby is allowed
*/
private void precheckForFailoverLocally(String standbyUuid) {
Site standby = drUtil.getLocalSite();
// API should be only send to local site
if (!standby.getUuid().equals(standbyUuid)) {
throw APIException.internalServerErrors.failoverPrecheckFailed(standby.getName(), String.format("Failover can only be executed in local site. Local site uuid %s is not matched with uuid %s", standby.getUuid(), standbyUuid));
}
String uuid = drUtil.getActiveSite().getUuid();
if (!StringUtils.isEmpty(uuid)) {
SiteNetworkState networkState = drUtil.getSiteNetworkState(uuid);
if (networkState.getNetworkHealth() != NetworkHealth.BROKEN) {
throw APIException.internalServerErrors.failoverPrecheckFailed(standby.getName(), "Active site is still available");
}
}
// Don't allow failover to site of ACTIVE_DEGRADED state in X-wing
if (standby.getState() != SiteState.STANDBY_PAUSED) {
throw APIException.internalServerErrors.failoverPrecheckFailed(standby.getName(), "Please wait for this site to recognize the Active site is down and automatically switch to a Paused state before failing over.");
}
precheckForFailover();
}
Aggregations