use of com.emc.storageos.coordinator.exceptions.CoordinatorException in project coprhd-controller by CoprHD.
the class DisasterRecoveryService method retryOperation.
/**
* Query the latest error message & Retry the Last operation for specific standby site
*
* @param uuid site UUID
* @brief Query the latest error message & Retry the Last operation for specific standby site
* @return updated standby site representation
*/
@POST
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@CheckPermission(roles = { Role.SECURITY_ADMIN, Role.RESTRICTED_SECURITY_ADMIN, Role.SYSTEM_ADMIN, Role.RESTRICTED_SYSTEM_ADMIN })
@Path("/{uuid}/retry")
public SiteRestRep retryOperation(@PathParam("uuid") String uuid) {
log.info("Begin to get site error by uuid {}", uuid);
Site standby;
try {
standby = drUtil.getSiteFromLocalVdc(uuid);
} catch (CoordinatorException e) {
log.error("Can't find site {} from ZK", uuid);
throw APIException.badRequests.siteIdNotFound();
}
if (!standby.getState().equals(SiteState.STANDBY_ERROR)) {
log.error("site {} is in state {}, should be STANDBY_ERROR", uuid, standby.getState());
throw APIException.badRequests.operationOnlyAllowedOnErrorSite(standby.getName(), standby.getState().toString());
}
if (!standby.getLastState().equals(SiteState.STANDBY_PAUSING) && !standby.getLastState().equals(SiteState.STANDBY_RESUMING) && !standby.getLastState().equals(SiteState.STANDBY_FAILING_OVER)) {
log.error("site {} lastState was {}, retry is only supported for Pause, Resume and Failover", uuid, standby.getLastState());
throw APIException.badRequests.operationRetryOnlyAllowedOnLastState(standby.getName(), standby.getLastState().toString());
}
// Reuse the current action required
Site localSite = drUtil.getLocalSite();
SiteInfo siteInfo = coordinator.getTargetInfo(localSite.getUuid(), SiteInfo.class);
String drOperation = siteInfo.getActionRequired();
// Check that last action matches retry action
if (!drOperation.equals(standby.getLastState().getDRAction())) {
log.error("Active site last operation was {}, retry is only supported if no other operations have been performed", drOperation);
throw APIException.internalServerErrors.retryStandbyPrecheckFailed(standby.getName(), standby.getLastState().toString(), String.format("Another DR operation %s has been run on Active site. Only the latest operation can be retried. " + "This is an unrecoverable Error, please remove site and deploy a new one.", drOperation));
}
InterProcessLock lock = drUtil.getDROperationLock();
try {
coordinator.startTransaction();
standby.setState(standby.getLastState());
// Failover requires setting old active site to last state as well.
if (standby.getState() == SiteState.STANDBY_FAILING_OVER) {
for (Site site : drUtil.listSites()) {
if (site.getLastState() == SiteState.ACTIVE_FAILING_OVER) {
site.setState(SiteState.ACTIVE_FAILING_OVER);
coordinator.persistServiceConfiguration(site.toConfiguration());
}
}
}
coordinator.persistServiceConfiguration(standby.toConfiguration());
log.info("Notify all sites for reconfig");
long vdcTargetVersion = DrUtil.newVdcConfigVersion();
for (Site site : drUtil.listSites()) {
String siteUuid = site.getUuid();
if (site.getLastState() == SiteState.STANDBY_RESUMING) {
SiteInfo siteTargetInfo = coordinator.getTargetInfo(siteUuid, SiteInfo.class);
String resumeSiteOperation = siteTargetInfo.getActionRequired();
if (resumeSiteOperation.equals(SiteInfo.DR_OP_CHANGE_DATA_REVISION)) {
long dataRevision = vdcTargetVersion;
drUtil.updateVdcTargetVersion(siteUuid, resumeSiteOperation, vdcTargetVersion, dataRevision);
continue;
}
}
log.info("Set dr operation {} on site {}", drOperation, siteUuid);
drUtil.updateVdcTargetVersion(siteUuid, drOperation, vdcTargetVersion);
}
coordinator.commitTransaction();
return siteMapper.map(standby);
} catch (Exception e) {
log.error("Error retrying site operation for site {}", uuid, e);
coordinator.discardTransaction();
auditDisasterRecoveryOps(OperationTypeEnum.RETRY_STANDBY_OP, AuditLogManager.AUDITLOG_FAILURE, null, standby);
InternalServerErrorException retryStandbyOpFailedException = APIException.internalServerErrors.retryStandbyOpFailed(standby.getName(), e.getMessage());
throw retryStandbyOpFailedException;
} finally {
try {
lock.release();
} catch (Exception ignore) {
log.error(String.format("Lock release failed when retrying standby site last op: %s", uuid));
}
}
}
use of com.emc.storageos.coordinator.exceptions.CoordinatorException in project coprhd-controller by CoprHD.
the class DisasterRecoveryService method precheckForSwitchoverForActiveSite.
protected void precheckForSwitchoverForActiveSite(String standbyUuid) {
Site standby = null;
if (drUtil.isStandby()) {
throw APIException.badRequests.operationOnlyAllowedOnActiveSite();
}
try {
standby = drUtil.getSiteFromLocalVdc(standbyUuid);
} catch (CoordinatorException e) {
throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getUuid(), "Standby uuid is not valid, can't find it");
}
if (standbyUuid.equals(drUtil.getActiveSite().getUuid())) {
throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getName(), "Can't switchover to an active site");
}
if (standby.getState() != SiteState.STANDBY_SYNCED) {
throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getName(), "Standby site is not fully synced");
}
if (!drUtil.isSiteUp(standbyUuid)) {
throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getName(), "Standby site is not up");
}
if (coordinator.getControlNodesState(standby.getUuid()) != ClusterInfo.ClusterState.STABLE) {
throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getName(), "Standby site is not stable");
}
if (!isClusterStable()) {
throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getName(), "Active site is not stable");
}
checkSiteConnectivity(standby);
List<Site> existingSites = drUtil.listStandbySites();
for (Site site : existingSites) {
if (site.getState() != SiteState.STANDBY_SYNCED && site.getState() != SiteState.STANDBY_PAUSED) {
throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getName(), String.format("Standby site %s is not synced or paused", site.getName()));
}
ClusterInfo.ClusterState state = coordinator.getControlNodesState(site.getUuid());
if (site.getState() != SiteState.STANDBY_PAUSED && state != ClusterInfo.ClusterState.STABLE) {
log.info("Site {} is not stable {}", site.getUuid(), state);
throw APIException.internalServerErrors.switchoverPrecheckFailed(standby.getName(), String.format("Site %s is not stable", site.getName()));
}
}
}
use of com.emc.storageos.coordinator.exceptions.CoordinatorException in project coprhd-controller by CoprHD.
the class BeaconTest method testBeacon.
@Test
public void testBeacon() throws Exception {
final String tag = "foo";
final String endpointKey = "key";
DummyServiceImpl service = new DummyServiceImpl(10099, tag, endpointKey);
service.startRmiServer();
service.start(1000 * 5);
Service si = service.getServiceInfo();
CoordinatorClient client = connectClient();
DummyService found = client.locateService(DummyService.class, si.getName(), si.getVersion(), tag, endpointKey);
Assert.assertNotNull(found);
found.test();
List<Service> services = client.locateAllServices(si.getName(), si.getVersion(), tag, endpointKey);
Assert.assertEquals(services.size(), 1);
Assert.assertEquals(services.get(0).getName(), si.getName());
Assert.assertEquals(services.get(0).getVersion(), si.getVersion());
Assert.assertEquals(services.get(0).getEndpoint(), si.getEndpoint());
Assert.assertTrue(services.get(0).isTagged(tag));
Assert.assertEquals(services.get(0).getEndpoint(endpointKey), si.getEndpoint());
try {
client.locateService(DummyService.class, si.getName(), si.getVersion(), "random", endpointKey);
assert false;
} catch (CoordinatorException expected) {
// ignore this exception since it's expected
}
try {
client.locateService(DummyService.class, si.getName(), si.getVersion(), tag, "random");
assert false;
} catch (CoordinatorException expected) {
_log.info("CoordinatorException is throwed as we expected", expected);
}
service.stopRmiServer();
service.stop();
}
use of com.emc.storageos.coordinator.exceptions.CoordinatorException in project coprhd-controller by CoprHD.
the class DrUtil method isSiteUp.
/**
* Check if site is up and running
*
* @param siteId
* @return true if any syssvc is running on this site
*/
public boolean isSiteUp(String siteId) {
// Get service beacons for given site - - assume syssvc on all sites share same service name in beacon
try {
String syssvcName = ((CoordinatorClientImpl) coordinator).getSysSvcName();
String syssvcVersion = ((CoordinatorClientImpl) coordinator).getSysSvcVersion();
List<Service> svcs = coordinator.locateAllServices(siteId, syssvcName, syssvcVersion, null, null);
List<String> nodeList = new ArrayList<>();
for (Service svc : svcs) {
nodeList.add(svc.getNodeId());
}
log.info("Site {} is up. active nodes {}", siteId, StringUtils.join(nodeList, ","));
return true;
} catch (CoordinatorException ex) {
if (ex.getServiceCode() == ServiceCode.COORDINATOR_SVC_NOT_FOUND) {
// no service beacon found for given site
return false;
}
log.error("Unexpected error when checking site service becons", ex);
return true;
}
}
use of com.emc.storageos.coordinator.exceptions.CoordinatorException in project coprhd-controller by CoprHD.
the class CoordinatorClientImpl method getLock.
private InterProcessLock getLock(String parentPath, String name) throws CoordinatorException {
EnsurePath path = new EnsurePath(parentPath);
try {
path.ensure(_zkConnection.curator().getZookeeperClient());
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw CoordinatorException.fatals.unableToGetLock(name, e);
} catch (Exception e) {
throw CoordinatorException.fatals.unableToGetLock(name, e);
}
String lockPath = ZKPaths.makePath(parentPath, name);
return new InterProcessMutex(_zkConnection.curator(), lockPath);
}
Aggregations