Search in sources :

Example 21 with CoordinatorException

use of com.emc.storageos.coordinator.exceptions.CoordinatorException in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method retryOperation.

/**
 * Query the latest error message & Retry the Last operation for specific standby site
 *
 * @param uuid site UUID
 * @brief Query the latest error message & Retry the Last operation for specific standby site
 * @return updated standby site representation
 */
@POST
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@CheckPermission(roles = { Role.SECURITY_ADMIN, Role.RESTRICTED_SECURITY_ADMIN, Role.SYSTEM_ADMIN, Role.RESTRICTED_SYSTEM_ADMIN })
@Path("/{uuid}/retry")
public SiteRestRep retryOperation(@PathParam("uuid") String uuid) {
    log.info("Begin to get site error by uuid {}", uuid);
    Site standby;
    try {
        standby = drUtil.getSiteFromLocalVdc(uuid);
    } catch (CoordinatorException e) {
        log.error("Can't find site {} from ZK", uuid);
        throw APIException.badRequests.siteIdNotFound();
    }
    if (!standby.getState().equals(SiteState.STANDBY_ERROR)) {
        log.error("site {} is in state {}, should be STANDBY_ERROR", uuid, standby.getState());
        throw APIException.badRequests.operationOnlyAllowedOnErrorSite(standby.getName(), standby.getState().toString());
    }
    if (!standby.getLastState().equals(SiteState.STANDBY_PAUSING) && !standby.getLastState().equals(SiteState.STANDBY_RESUMING) && !standby.getLastState().equals(SiteState.STANDBY_FAILING_OVER)) {
        log.error("site {} lastState was {}, retry is only supported for Pause, Resume and Failover", uuid, standby.getLastState());
        throw APIException.badRequests.operationRetryOnlyAllowedOnLastState(standby.getName(), standby.getLastState().toString());
    }
    // Reuse the current action required
    Site localSite = drUtil.getLocalSite();
    SiteInfo siteInfo = coordinator.getTargetInfo(localSite.getUuid(), SiteInfo.class);
    String drOperation = siteInfo.getActionRequired();
    // Check that last action matches retry action
    if (!drOperation.equals(standby.getLastState().getDRAction())) {
        log.error("Active site last operation was {}, retry is only supported if no other operations have been performed", drOperation);
        throw APIException.internalServerErrors.retryStandbyPrecheckFailed(standby.getName(), standby.getLastState().toString(), String.format("Another DR operation %s has been run on Active site. Only the latest operation can be retried. " + "This is an unrecoverable Error, please remove site and deploy a new one.", drOperation));
    }
    InterProcessLock lock = drUtil.getDROperationLock();
    try {
        coordinator.startTransaction();
        standby.setState(standby.getLastState());
        // Failover requires setting old active site to last state as well.
        if (standby.getState() == SiteState.STANDBY_FAILING_OVER) {
            for (Site site : drUtil.listSites()) {
                if (site.getLastState() == SiteState.ACTIVE_FAILING_OVER) {
                    site.setState(SiteState.ACTIVE_FAILING_OVER);
                    coordinator.persistServiceConfiguration(site.toConfiguration());
                }
            }
        }
        coordinator.persistServiceConfiguration(standby.toConfiguration());
        log.info("Notify all sites for reconfig");
        long vdcTargetVersion = DrUtil.newVdcConfigVersion();
        for (Site site : drUtil.listSites()) {
            String siteUuid = site.getUuid();
            if (site.getLastState() == SiteState.STANDBY_RESUMING) {
                SiteInfo siteTargetInfo = coordinator.getTargetInfo(siteUuid, SiteInfo.class);
                String resumeSiteOperation = siteTargetInfo.getActionRequired();
                if (resumeSiteOperation.equals(SiteInfo.DR_OP_CHANGE_DATA_REVISION)) {
                    long dataRevision = vdcTargetVersion;
                    drUtil.updateVdcTargetVersion(siteUuid, resumeSiteOperation, vdcTargetVersion, dataRevision);
                    continue;
                }
            }
            log.info("Set dr operation {} on site {}", drOperation, siteUuid);
            drUtil.updateVdcTargetVersion(siteUuid, drOperation, vdcTargetVersion);
        }
        coordinator.commitTransaction();
        return siteMapper.map(standby);
    } catch (Exception e) {
        log.error("Error retrying site operation for site {}", uuid, e);
        coordinator.discardTransaction();
        auditDisasterRecoveryOps(OperationTypeEnum.RETRY_STANDBY_OP, AuditLogManager.AUDITLOG_FAILURE, null, standby);
        InternalServerErrorException retryStandbyOpFailedException = APIException.internalServerErrors.retryStandbyOpFailed(standby.getName(), e.getMessage());
        throw retryStandbyOpFailedException;
    } finally {
        try {
            lock.release();
        } catch (Exception ignore) {
            log.error(String.format("Lock release failed when retrying standby site last op: %s", uuid));
        }
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SiteInfo(com.emc.storageos.coordinator.client.model.SiteInfo) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) UnknownHostException(java.net.UnknownHostException) Path(javax.ws.rs.Path) ZkPath(com.emc.storageos.coordinator.common.impl.ZkPath) POST(javax.ws.rs.POST) Produces(javax.ws.rs.Produces) CheckPermission(com.emc.storageos.security.authorization.CheckPermission)

Example 22 with CoordinatorException

use of com.emc.storageos.coordinator.exceptions.CoordinatorException in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method precheckForSwitchoverForActiveSite.

protected void precheckForSwitchoverForActiveSite(String standbyUuid) {
    Site standby = null;
    if (drUtil.isStandby()) {
        throw APIException.badRequests.operationOnlyAllowedOnActiveSite();
    }
    try {
        standby = drUtil.getSiteFromLocalVdc(standbyUuid);
    } catch (CoordinatorException e) {
        throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getUuid(), "Standby uuid is not valid, can't find it");
    }
    if (standbyUuid.equals(drUtil.getActiveSite().getUuid())) {
        throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getName(), "Can't switchover to an active site");
    }
    if (standby.getState() != SiteState.STANDBY_SYNCED) {
        throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getName(), "Standby site is not fully synced");
    }
    if (!drUtil.isSiteUp(standbyUuid)) {
        throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getName(), "Standby site is not up");
    }
    if (coordinator.getControlNodesState(standby.getUuid()) != ClusterInfo.ClusterState.STABLE) {
        throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getName(), "Standby site is not stable");
    }
    if (!isClusterStable()) {
        throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getName(), "Active site is not stable");
    }
    checkSiteConnectivity(standby);
    List<Site> existingSites = drUtil.listStandbySites();
    for (Site site : existingSites) {
        if (site.getState() != SiteState.STANDBY_SYNCED && site.getState() != SiteState.STANDBY_PAUSED) {
            throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getName(), String.format("Standby site %s is not synced or paused", site.getName()));
        }
        ClusterInfo.ClusterState state = coordinator.getControlNodesState(site.getUuid());
        if (site.getState() != SiteState.STANDBY_PAUSED && state != ClusterInfo.ClusterState.STABLE) {
            log.info("Site {} is not stable {}", site.getUuid(), state);
            throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getName(), String.format("Site %s is not stable", site.getName()));
        }
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) ClusterInfo(com.emc.vipr.model.sys.ClusterInfo) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException)

Example 23 with CoordinatorException

use of com.emc.storageos.coordinator.exceptions.CoordinatorException in project coprhd-controller by CoprHD.

the class BeaconTest method testBeacon.

@Test
public void testBeacon() throws Exception {
    final String tag = "foo";
    final String endpointKey = "key";
    DummyServiceImpl service = new DummyServiceImpl(10099, tag, endpointKey);
    service.startRmiServer();
    service.start(1000 * 5);
    Service si = service.getServiceInfo();
    CoordinatorClient client = connectClient();
    DummyService found = client.locateService(DummyService.class, si.getName(), si.getVersion(), tag, endpointKey);
    Assert.assertNotNull(found);
    found.test();
    List<Service> services = client.locateAllServices(si.getName(), si.getVersion(), tag, endpointKey);
    Assert.assertEquals(services.size(), 1);
    Assert.assertEquals(services.get(0).getName(), si.getName());
    Assert.assertEquals(services.get(0).getVersion(), si.getVersion());
    Assert.assertEquals(services.get(0).getEndpoint(), si.getEndpoint());
    Assert.assertTrue(services.get(0).isTagged(tag));
    Assert.assertEquals(services.get(0).getEndpoint(endpointKey), si.getEndpoint());
    try {
        client.locateService(DummyService.class, si.getName(), si.getVersion(), "random", endpointKey);
        assert false;
    } catch (CoordinatorException expected) {
    // ignore this exception since it's expected
    }
    try {
        client.locateService(DummyService.class, si.getName(), si.getVersion(), tag, "random");
        assert false;
    } catch (CoordinatorException expected) {
        _log.info("CoordinatorException is throwed as we expected", expected);
    }
    service.stopRmiServer();
    service.stop();
}
Also used : FatalCoordinatorException(com.emc.storageos.coordinator.exceptions.FatalCoordinatorException) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) Service(com.emc.storageos.coordinator.common.Service) Test(org.junit.Test)

Example 24 with CoordinatorException

use of com.emc.storageos.coordinator.exceptions.CoordinatorException in project coprhd-controller by CoprHD.

the class DrUtil method isSiteUp.

/**
 * Check if site is up and running
 *
 * @param siteId
 * @return true if any syssvc is running on this site
 */
public boolean isSiteUp(String siteId) {
    // Get service beacons for given site - - assume syssvc on all sites share same service name in beacon
    try {
        String syssvcName = ((CoordinatorClientImpl) coordinator).getSysSvcName();
        String syssvcVersion = ((CoordinatorClientImpl) coordinator).getSysSvcVersion();
        List<Service> svcs = coordinator.locateAllServices(siteId, syssvcName, syssvcVersion, null, null);
        List<String> nodeList = new ArrayList<>();
        for (Service svc : svcs) {
            nodeList.add(svc.getNodeId());
        }
        log.info("Site {} is up. active nodes {}", siteId, StringUtils.join(nodeList, ","));
        return true;
    } catch (CoordinatorException ex) {
        if (ex.getServiceCode() == ServiceCode.COORDINATOR_SVC_NOT_FOUND) {
            // no service beacon found for given site
            return false;
        }
        log.error("Unexpected error when checking site service becons", ex);
        return true;
    }
}
Also used : CoordinatorClientImpl(com.emc.storageos.coordinator.client.service.impl.CoordinatorClientImpl) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) ArrayList(java.util.ArrayList) Service(com.emc.storageos.coordinator.common.Service)

Example 25 with CoordinatorException

use of com.emc.storageos.coordinator.exceptions.CoordinatorException in project coprhd-controller by CoprHD.

the class CoordinatorClientImpl method getLock.

private InterProcessLock getLock(String parentPath, String name) throws CoordinatorException {
    EnsurePath path = new EnsurePath(parentPath);
    try {
        path.ensure(_zkConnection.curator().getZookeeperClient());
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw CoordinatorException.fatals.unableToGetLock(name, e);
    } catch (Exception e) {
        throw CoordinatorException.fatals.unableToGetLock(name, e);
    }
    String lockPath = ZKPaths.makePath(parentPath, name);
    return new InterProcessMutex(_zkConnection.curator(), lockPath);
}
Also used : EnsurePath(org.apache.curator.utils.EnsurePath) PropertyInfoMapper.decodeFromString(com.emc.storageos.coordinator.mapper.PropertyInfoMapper.decodeFromString) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) InterProcessMutex(org.apache.curator.framework.recipes.locks.InterProcessMutex)

Aggregations

CoordinatorException (com.emc.storageos.coordinator.exceptions.CoordinatorException)29 RetryableCoordinatorException (com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException)19 UnknownHostException (java.net.UnknownHostException)14 KeeperException (org.apache.zookeeper.KeeperException)13 PropertyInfoMapper.decodeFromString (com.emc.storageos.coordinator.mapper.PropertyInfoMapper.decodeFromString)12 IOException (java.io.IOException)12 Site (com.emc.storageos.coordinator.client.model.Site)6 Configuration (com.emc.storageos.coordinator.common.Configuration)6 EnsurePath (org.apache.curator.utils.EnsurePath)6 Service (com.emc.storageos.coordinator.common.Service)5 PropertyInfo (com.emc.storageos.model.property.PropertyInfo)3 ClusterInfo (com.emc.vipr.model.sys.ClusterInfo)3 ArrayList (java.util.ArrayList)3 CuratorTransaction (org.apache.curator.framework.api.transaction.CuratorTransaction)3 CuratorTransactionFinal (org.apache.curator.framework.api.transaction.CuratorTransactionFinal)3 CoordinatorClientImpl (com.emc.storageos.coordinator.client.service.impl.CoordinatorClientImpl)2 ZkPath (com.emc.storageos.coordinator.common.impl.ZkPath)2 CheckPermission (com.emc.storageos.security.authorization.CheckPermission)2 APIException (com.emc.storageos.svcs.errorhandling.resources.APIException)2 InternalServerErrorException (com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException)2