Search in sources :

Example 1 with SiteNetworkState

use of com.emc.storageos.coordinator.client.model.SiteNetworkState in project coprhd-controller by CoprHD.

the class DrSiteNetworkMonitor method checkPing.

private void checkPing() {
    Site localSite = drUtil.getLocalSite();
    SiteNetworkState localNetworkState = drUtil.getSiteNetworkState(localSite.getUuid());
    if (!NetworkHealth.GOOD.equals(localNetworkState.getNetworkHealth()) || localNetworkState.getNetworkLatencyInMs() != 0) {
        localNetworkState.setNetworkLatencyInMs(0);
        localNetworkState.setNetworkHealth(NetworkHealth.GOOD);
        coordinatorClient.setTargetInfo(localSite.getUuid(), localNetworkState);
    }
    for (Site site : drUtil.listSites()) {
        if (drUtil.isLocalSite(site)) {
            // skip local site
            continue;
        }
        SiteNetworkState siteNetworkState = drUtil.getSiteNetworkState(site.getUuid());
        NetworkHealth previousState = siteNetworkState.getNetworkHealth();
        String host = site.getVipEndPoint();
        double ping = drUtil.testPing(host, SOCKET_TEST_PORT, NETWORK_TIMEOUT);
        // if ping successful get an average, format to 3 decimal places
        if (ping != -1) {
            ping = (ping + drUtil.testPing(host, SOCKET_TEST_PORT, NETWORK_TIMEOUT) + drUtil.testPing(host, SOCKET_TEST_PORT, NETWORK_TIMEOUT)) / 3;
            DecimalFormat df = new DecimalFormat("#.###");
            ping = Double.parseDouble(df.format(ping));
        }
        _log.info("Ping: " + ping);
        siteNetworkState.setNetworkLatencyInMs(ping);
        if (ping > NETWORK_SLOW_THRESHOLD) {
            siteNetworkState.setNetworkHealth(NetworkHealth.SLOW);
            _log.warn("Network for standby {} is slow", site.getName());
            AlertsLogger.getAlertsLogger().warn(String.format("Network for standby %s is Broken:" + "Latency was reported as %f ms", site.getName(), ping));
        } else if (ping < 0) {
            siteNetworkState.setNetworkHealth(NetworkHealth.BROKEN);
            _log.error("Network for standby {} is broken", site.getName());
            AlertsLogger.getAlertsLogger().error(String.format("Network for standby %s is Broken:" + "Latency was reported as %s ms", site.getName(), ping));
        } else {
            siteNetworkState.setNetworkHealth(NetworkHealth.GOOD);
        }
        coordinatorClient.setTargetInfo(site.getUuid(), siteNetworkState);
        if (drUtil.isActiveSite()) {
            SiteState state = site.getState();
            if (SiteState.STANDBY_ADDING == state || SiteState.STANDBY_RESUMING == state) {
                _log.info("Skip mail alert during add-standby or resume-standby for {}", site.getUuid());
                continue;
            }
            if (!NetworkHealth.BROKEN.equals(previousState) && NetworkHealth.BROKEN.equals(siteNetworkState.getNetworkHealth())) {
                // Add to systemevent log
                _alertLog.error(MessageFormat.format("Network connection to site %s has been broken.", site.getName()));
                // send email alert
                mailHandler.sendSiteNetworkBrokenMail(site);
            }
        }
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SiteState(com.emc.storageos.coordinator.client.model.SiteState) NetworkHealth(com.emc.storageos.coordinator.client.model.SiteNetworkState.NetworkHealth) DecimalFormat(java.text.DecimalFormat) SiteNetworkState(com.emc.storageos.coordinator.client.model.SiteNetworkState)

Example 2 with SiteNetworkState

use of com.emc.storageos.coordinator.client.model.SiteNetworkState in project coprhd-controller by CoprHD.

the class DisasterRecoveryServiceTest method setUp.

@Before
public void setUp() throws Exception {
    Constructor constructor = ProductName.class.getDeclaredConstructors()[0];
    constructor.setAccessible(true);
    ProductName productName = (ProductName) constructor.newInstance();
    productName.setName("vipr");
    SoftwareVersion version = new SoftwareVersion("vipr-2.4.0.0.100");
    LinkedList<SoftwareVersion> available = new LinkedList<SoftwareVersion>();
    available.add(version);
    RepositoryInfo repositoryInfo = new RepositoryInfo(new SoftwareVersion("vipr-2.4.0.0.100"), available);
    standby = new SiteConfigRestRep();
    standby.setClusterStable(true);
    standby.setFreshInstallation(true);
    standby.setDbSchemaVersion("2.4");
    standby.setSoftwareVersion("vipr-2.4.0.0.150");
    standby.setHostIPv4AddressMap(new HashMap<String, String>());
    standby.getHostIPv4AddressMap().put("vipr1", "10.247.101.100");
    // setup standby site
    standbySite1 = new Site();
    standbySite1.setUuid("site-uuid-1");
    standbySite1.setVip("10.247.101.110");
    standbySite1.getHostIPv4AddressMap().put("vipr1", "10.247.101.111");
    standbySite1.getHostIPv4AddressMap().put("vipr2", "10.247.101.112");
    standbySite1.getHostIPv4AddressMap().put("vipr3", "10.247.101.113");
    standbySite1.setState(SiteState.STANDBY_PAUSED);
    standbySite1.setVdcShortId("vdc1");
    standbySite1.setNodeCount(1);
    standbySite2 = new Site();
    standbySite2.setUuid("site-uuid-2");
    standbySite2.setState(SiteState.STANDBY_SYNCED);
    standbySite2.setVdcShortId("vdc1");
    standbySite2.setVip("10.247.101.158");
    standbySite2.setNodeCount(1);
    standbySite3 = new Site();
    standbySite3.setUuid("site-uuid-3");
    standbySite3.setVdcShortId("fake-vdc-id");
    standbySite3.setState(SiteState.ACTIVE);
    standbySite3.setVdcShortId("vdc1");
    standbySite3.setNodeCount(1);
    primarySite = new Site();
    primarySite.setUuid("primary-site-uuid");
    primarySite.setVip("127.0.0.1");
    primarySite.setHostIPv4AddressMap(standbySite1.getHostIPv4AddressMap());
    primarySite.setHostIPv6AddressMap(standbySite1.getHostIPv6AddressMap());
    primarySite.setVdcShortId("vdc1");
    primarySite.setState(SiteState.ACTIVE);
    primarySite.setNodeCount(3);
    // mock DBClient
    dbClientMock = mock(DbClientImpl.class);
    // mock coordinator client
    coordinator = mock(CoordinatorClient.class);
    // mock ipsecconfig
    IPsecConfig ipsecConfig = mock(IPsecConfig.class);
    doReturn("ipsec-preshared-key").when(ipsecConfig).getPreSharedKey();
    drUtil = mock(DrUtil.class);
    natCheckParam = new DRNatCheckParam();
    apiSignatureGeneratorMock = mock(InternalApiSignatureKeyGenerator.class);
    try {
        KeyGenerator keyGenerator = null;
        keyGenerator = KeyGenerator.getInstance("HmacSHA256");
        secretKey = keyGenerator.generateKey();
    } catch (NoSuchAlgorithmException e) {
        fail("generate key fail");
    }
    drService = spy(new DisasterRecoveryService());
    drService.setDbClient(dbClientMock);
    drService.setCoordinator(coordinator);
    drService.setDrUtil(drUtil);
    drService.setSiteMapper(new SiteMapper());
    drService.setSysUtils(new SysUtils());
    drService.setIpsecConfig(ipsecConfig);
    drService.setApiSignatureGenerator(apiSignatureGeneratorMock);
    standbyConfig = new Site();
    standbyConfig.setUuid("standby-site-uuid-1");
    standbyConfig.setVip(standbySite1.getVip());
    standbyConfig.setHostIPv4AddressMap(standbySite1.getHostIPv4AddressMap());
    standbyConfig.setHostIPv6AddressMap(standbySite1.getHostIPv6AddressMap());
    standbyConfig.setNodeCount(3);
    doReturn(standbyConfig.getUuid()).when(coordinator).getSiteId();
    Configuration config = new ConfigurationImpl();
    config.setConfig(Constants.CONFIG_DR_ACTIVE_SITEID, primarySite.getUuid());
    doReturn(config).when(coordinator).queryConfiguration(Constants.CONFIG_DR_ACTIVE_KIND, Constants.CONFIG_DR_ACTIVE_ID);
    doReturn("2.4").when(coordinator).getCurrentDbSchemaVersion();
    doReturn(primarySite.getUuid()).when(coordinator).getSiteId();
    doReturn(ClusterInfo.ClusterState.STABLE).when(coordinator).getControlNodesState();
    // Don't need to record audit log in UT
    doNothing().when(drService).auditDisasterRecoveryOps(any(OperationTypeEnum.class), anyString(), anyString(), anyVararg());
    doReturn(repositoryInfo).when(coordinator).getTargetInfo(RepositoryInfo.class);
    doReturn(standbySite1).when(drUtil).getSiteFromLocalVdc(standbySite1.getUuid());
    doReturn(standbySite2).when(drUtil).getSiteFromLocalVdc(standbySite2.getUuid());
    doThrow(CoordinatorException.retryables.cannotFindSite(NONEXISTENT_ID)).when(drUtil).getSiteFromLocalVdc(NONEXISTENT_ID);
    doReturn(primarySite).when(drUtil).getSiteFromLocalVdc(primarySite.getUuid());
    SiteNetworkState siteNetworkState = new SiteNetworkState();
    siteNetworkState.setNetworkHealth(SiteNetworkState.NetworkHealth.GOOD);
    doReturn(siteNetworkState).when(drUtil).getSiteNetworkState(any(String.class));
    CoordinatorClientInetAddressMap addressMap = new CoordinatorClientInetAddressMap();
    addressMap.setDualInetAddress(DualInetAddress.fromAddresses("10.247.101.110", ""));
    doReturn(addressMap).when(coordinator).getInetAddessLookupMap();
    InterProcessLock lock = mock(InterProcessLock.class);
    doReturn(lock).when(coordinator).getLock(anyString());
    doReturn(true).when(lock).acquire(anyInt(), any(TimeUnit.class));
    doNothing().when(lock).release();
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) IPsecConfig(com.emc.storageos.security.ipsec.IPsecConfig) ProductName(com.emc.storageos.coordinator.client.model.ProductName) Configuration(com.emc.storageos.coordinator.common.Configuration) OperationTypeEnum(com.emc.storageos.services.OperationTypeEnum) Matchers.anyString(org.mockito.Matchers.anyString) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) CoordinatorClient(com.emc.storageos.coordinator.client.service.CoordinatorClient) TimeUnit(java.util.concurrent.TimeUnit) SiteConfigRestRep(com.emc.storageos.model.dr.SiteConfigRestRep) KeyGenerator(javax.crypto.KeyGenerator) InternalApiSignatureKeyGenerator(com.emc.storageos.security.authentication.InternalApiSignatureKeyGenerator) ConfigurationImpl(com.emc.storageos.coordinator.common.impl.ConfigurationImpl) InternalApiSignatureKeyGenerator(com.emc.storageos.security.authentication.InternalApiSignatureKeyGenerator) RepositoryInfo(com.emc.storageos.coordinator.client.model.RepositoryInfo) Constructor(java.lang.reflect.Constructor) SiteMapper(com.emc.storageos.api.mapper.SiteMapper) DrUtil(com.emc.storageos.coordinator.client.service.DrUtil) LinkedList(java.util.LinkedList) SysUtils(com.emc.storageos.services.util.SysUtils) SoftwareVersion(com.emc.storageos.coordinator.client.model.SoftwareVersion) DbClientImpl(com.emc.storageos.db.client.impl.DbClientImpl) DRNatCheckParam(com.emc.storageos.model.dr.DRNatCheckParam) CoordinatorClientInetAddressMap(com.emc.storageos.coordinator.client.service.impl.CoordinatorClientInetAddressMap) SiteNetworkState(com.emc.storageos.coordinator.client.model.SiteNetworkState) InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock) Before(org.junit.Before)

Example 3 with SiteNetworkState

use of com.emc.storageos.coordinator.client.model.SiteNetworkState in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method resumeStandby.

/**
 * Resume data replication for a paused standby site
 *
 * @param uuid site UUID
 * @brief Resume data replication for a paused standby site
 * @return updated standby site representation
 */
@POST
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@CheckPermission(roles = { Role.SECURITY_ADMIN, Role.RESTRICTED_SECURITY_ADMIN, Role.SYSTEM_ADMIN, Role.RESTRICTED_SYSTEM_ADMIN }, blockProxies = true)
@Path("/{uuid}/resume")
public SiteRestRep resumeStandby(@PathParam("uuid") String uuid) {
    log.info("Begin to resume data sync to standby site identified by uuid: {}", uuid);
    Site standby = validateSiteConfig(uuid);
    SiteState state = standby.getState();
    if (!state.equals(SiteState.STANDBY_PAUSED) && !state.equals(SiteState.ACTIVE_DEGRADED)) {
        log.error("site {} is in state {}, should be STANDBY_PAUSED or ACTIVE_DEGRADED", uuid, standby.getState());
        throw APIException.badRequests.operationOnlyAllowedOnPausedSite(standby.getName(), standby.getState().toString());
    }
    SiteNetworkState networkState = drUtil.getSiteNetworkState(uuid);
    if (networkState.getNetworkHealth() == NetworkHealth.BROKEN) {
        throw APIException.internalServerErrors.siteConnectionBroken(standby.getName(), "Network health state is broken.");
    }
    try (InternalSiteServiceClient client = createInternalSiteServiceClient(standby)) {
        commonPrecheck();
        client.setCoordinatorClient(coordinator);
        client.setKeyGenerator(apiSignatureGenerator);
        client.resumePrecheck();
    } catch (APIException e) {
        throw e;
    } catch (Exception e) {
        throw APIException.internalServerErrors.resumeStandbyPrecheckFailed(standby.getName(), e.getMessage());
    }
    // Do this before tx get started which might write key to zk.
    SecretKey secretKey = apiSignatureGenerator.getSignatureKey(SignatureKeyType.INTERVDC_API);
    InterProcessLock lock = drUtil.getDROperationLock();
    long vdcTargetVersion = DrUtil.newVdcConfigVersion();
    try {
        coordinator.startTransaction();
        for (Site site : drUtil.listStandbySites()) {
            if (site.getUuid().equals(uuid)) {
                log.error("Re-init the target standby", uuid);
                // init the to-be resumed standby site
                long dataRevision = vdcTargetVersion;
                List<Site> standbySites = drUtil.listStandbySites();
                SiteConfigParam configParam = prepareSiteConfigParam(standbySites, ipsecConfig.getPreSharedKey(), uuid, dataRevision, vdcTargetVersion, secretKey);
                try (InternalSiteServiceClient internalSiteServiceClient = new InternalSiteServiceClient()) {
                    internalSiteServiceClient.setCoordinatorClient(coordinator);
                    internalSiteServiceClient.setServer(site.getVipEndPoint());
                    internalSiteServiceClient.initStandby(configParam);
                }
                site.setState(SiteState.STANDBY_RESUMING);
                coordinator.persistServiceConfiguration(site.toConfiguration());
                drUtil.recordDrOperationStatus(site.getUuid(), InterState.RESUMING_STANDBY);
                drUtil.updateVdcTargetVersion(uuid, SiteInfo.DR_OP_CHANGE_DATA_REVISION, vdcTargetVersion, dataRevision);
            } else {
                drUtil.updateVdcTargetVersion(site.getUuid(), SiteInfo.DR_OP_RESUME_STANDBY, vdcTargetVersion);
            }
        }
        // update the local(active) site last
        drUtil.updateVdcTargetVersion(coordinator.getSiteId(), SiteInfo.DR_OP_RESUME_STANDBY, vdcTargetVersion);
        coordinator.commitTransaction();
        auditDisasterRecoveryOps(OperationTypeEnum.RESUME_STANDBY, AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_BEGIN, standby.toBriefString());
        return siteMapper.map(standby);
    } catch (Exception e) {
        log.error("Error resuming site {}", uuid, e);
        coordinator.discardTransaction();
        auditDisasterRecoveryOps(OperationTypeEnum.RESUME_STANDBY, AuditLogManager.AUDITLOG_FAILURE, null, standby.toBriefString());
        InternalServerErrorException resumeStandbyFailedException = APIException.internalServerErrors.resumeStandbyFailed(standby.getName(), e.getMessage());
        throw resumeStandbyFailedException;
    } finally {
        try {
            lock.release();
        } catch (Exception ignore) {
            log.error(String.format("Lock release failed when resuming standby site: %s", uuid));
        }
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SecretKey(javax.crypto.SecretKey) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) SiteState(com.emc.storageos.coordinator.client.model.SiteState) InternalSiteServiceClient(com.emc.storageos.api.service.impl.resource.utils.InternalSiteServiceClient) SiteNetworkState(com.emc.storageos.coordinator.client.model.SiteNetworkState) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock) SiteConfigParam(com.emc.storageos.model.dr.SiteConfigParam) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) UnknownHostException(java.net.UnknownHostException) Path(javax.ws.rs.Path) ZkPath(com.emc.storageos.coordinator.common.impl.ZkPath) POST(javax.ws.rs.POST) Produces(javax.ws.rs.Produces) CheckPermission(com.emc.storageos.security.authorization.CheckPermission)

Example 4 with SiteNetworkState

use of com.emc.storageos.coordinator.client.model.SiteNetworkState in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method precheckForFailoverLocally.

/*
     * Internal method to check whether failover to standby is allowed
     */
private void precheckForFailoverLocally(String standbyUuid) {
    Site standby = drUtil.getLocalSite();
    // API should be only send to local site
    if (!standby.getUuid().equals(standbyUuid)) {
        throw APIException.internalServerErrors.failoverPrecheckFailed(standby.getName(), String.format("Failover can only be executed in local site. Local site uuid %s is not matched with uuid %s", standby.getUuid(), standbyUuid));
    }
    String uuid = drUtil.getActiveSite().getUuid();
    if (!StringUtils.isEmpty(uuid)) {
        SiteNetworkState networkState = drUtil.getSiteNetworkState(uuid);
        if (networkState.getNetworkHealth() != NetworkHealth.BROKEN) {
            throw APIException.internalServerErrors.failoverPrecheckFailed(standby.getName(), "Active site is still available");
        }
    }
    // Don't allow failover to site of ACTIVE_DEGRADED state in X-wing
    if (standby.getState() != SiteState.STANDBY_PAUSED) {
        throw APIException.internalServerErrors.failoverPrecheckFailed(standby.getName(), "Please wait for this site to recognize the Active site is down and automatically switch to a Paused state before failing over.");
    }
    precheckForFailover();
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SiteNetworkState(com.emc.storageos.coordinator.client.model.SiteNetworkState)

Aggregations

Site (com.emc.storageos.coordinator.client.model.Site)4 SiteNetworkState (com.emc.storageos.coordinator.client.model.SiteNetworkState)4 SiteState (com.emc.storageos.coordinator.client.model.SiteState)2 InterProcessLock (org.apache.curator.framework.recipes.locks.InterProcessLock)2 SiteMapper (com.emc.storageos.api.mapper.SiteMapper)1 InternalSiteServiceClient (com.emc.storageos.api.service.impl.resource.utils.InternalSiteServiceClient)1 ProductName (com.emc.storageos.coordinator.client.model.ProductName)1 RepositoryInfo (com.emc.storageos.coordinator.client.model.RepositoryInfo)1 NetworkHealth (com.emc.storageos.coordinator.client.model.SiteNetworkState.NetworkHealth)1 SoftwareVersion (com.emc.storageos.coordinator.client.model.SoftwareVersion)1 CoordinatorClient (com.emc.storageos.coordinator.client.service.CoordinatorClient)1 DrUtil (com.emc.storageos.coordinator.client.service.DrUtil)1 CoordinatorClientInetAddressMap (com.emc.storageos.coordinator.client.service.impl.CoordinatorClientInetAddressMap)1 Configuration (com.emc.storageos.coordinator.common.Configuration)1 ConfigurationImpl (com.emc.storageos.coordinator.common.impl.ConfigurationImpl)1 ZkPath (com.emc.storageos.coordinator.common.impl.ZkPath)1 CoordinatorException (com.emc.storageos.coordinator.exceptions.CoordinatorException)1 RetryableCoordinatorException (com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException)1 DbClientImpl (com.emc.storageos.db.client.impl.DbClientImpl)1 DRNatCheckParam (com.emc.storageos.model.dr.DRNatCheckParam)1