use of com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException in project coprhd-controller by CoprHD.
the class VdcConfigUtil method genSiteProperties.
private void genSiteProperties(Map<String, String> vdcConfig, String vdcShortId, List<Site> sites) {
String activeSiteId = null;
try {
activeSiteId = drUtil.getActiveSite().getUuid();
} catch (RetryableCoordinatorException e) {
log.warn("Failed to find active site id from ZK, go on since it maybe switchover case");
}
SiteInfo siteInfo = coordinator.getTargetInfo(SiteInfo.class);
Site localSite = drUtil.getLocalSite();
if (StringUtils.isEmpty(activeSiteId) && SiteInfo.DR_OP_SWITCHOVER.equals(siteInfo.getActionRequired())) {
activeSiteId = drUtil.getSiteFromLocalVdc(siteInfo.getTargetSiteUUID()).getUuid();
}
Collections.sort(sites, new Comparator<Site>() {
@Override
public int compare(Site a, Site b) {
return (int) (a.getCreationTime() - b.getCreationTime());
}
});
List<String> shortIds = new ArrayList<>();
for (Site site : sites) {
if (shouldExcludeFromConfig(site)) {
log.info("Ignore site {} of vdc {}", site.getSiteShortId(), site.getVdcShortId());
continue;
}
// this will make it easier to resume the data replication.
if (!drUtil.isLocalSite(site) && (site.getState().equals(SiteState.STANDBY_PAUSING) || site.getState().equals(SiteState.STANDBY_PAUSED) || site.getState().equals(SiteState.STANDBY_REMOVING) || site.getState().equals(SiteState.ACTIVE_FAILING_OVER) || site.getState().equals(SiteState.ACTIVE_DEGRADED))) {
continue;
}
int siteNodeCnt = 0;
Map<String, String> siteIPv4Addrs = site.getHostIPv4AddressMap();
Map<String, String> siteIPv6Addrs = site.getHostIPv6AddressMap();
List<String> siteHosts = getHostsFromIPAddrMap(siteIPv4Addrs, siteIPv6Addrs);
String siteShortId = site.getSiteShortId();
// sort the host names as vipr1, vipr2 ...
Collections.sort(siteHosts);
for (String hostName : siteHosts) {
siteNodeCnt++;
String address = siteIPv4Addrs.get(hostName);
vdcConfig.put(String.format(VDC_SITE_IPADDR_PTN, vdcShortId, siteShortId, siteNodeCnt), address == null ? "" : address);
address = siteIPv6Addrs.get(hostName);
vdcConfig.put(String.format(VDC_SITE_IPADDR6_PTN, vdcShortId, siteShortId, siteNodeCnt), address == null ? "" : address);
}
vdcConfig.put(String.format(VDC_SITE_NODE_COUNT_PTN, vdcShortId, siteShortId), String.valueOf(siteNodeCnt));
vdcConfig.put(String.format(VDC_SITE_VIP_PTN, vdcShortId, siteShortId), site.getVip());
vdcConfig.put(String.format(VDC_SITE_VIP6_PTN, vdcShortId, siteShortId), site.getVip6());
if (drUtil.isLocalSite(site)) {
vdcConfig.put(SITE_MYID, siteShortId);
vdcConfig.put(SITE_MY_UUID, site.getUuid());
}
shortIds.add(siteShortId);
}
Collections.sort(shortIds);
if (drUtil.getLocalVdcShortId().equals(vdcShortId)) {
// right now we assume that SITE_IDS and SITE_IS_STANDBY only makes sense for local VDC
// moving forward this may or may not be the case.
vdcConfig.put(SITE_IDS, StringUtils.join(shortIds, ','));
vdcConfig.put(SITE_IS_STANDBY, String.valueOf(!localSite.getUuid().equals(activeSiteId)));
vdcConfig.put(SITE_ACTIVE_ID, StringUtils.isEmpty(activeSiteId) ? DEFAULT_ACTIVE_SITE_ID : drUtil.getSiteFromLocalVdc(activeSiteId).getSiteShortId());
}
vdcConfig.put(String.format(VDC_SITE_IDS, vdcShortId), StringUtils.join(shortIds, ','));
}
use of com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException in project coprhd-controller by CoprHD.
the class CoordinatorClientImpl method getSemaphoreLock.
@Override
public InterProcessSemaphoreMutex getSemaphoreLock(String name) throws CoordinatorException {
EnsurePath path = new EnsurePath(ZkPath.MUTEX.toString());
try {
path.ensure(_zkConnection.curator().getZookeeperClient());
} catch (Exception e) {
throw new RetryableCoordinatorException(ServiceCode.COORDINATOR_SVC_NOT_FOUND, e, "Unable to get lock {0}. Caused by: {1}", new Object[] { name, e.getMessage() });
}
String lockPath = ZKPaths.makePath(ZkPath.MUTEX.toString(), name);
return new InterProcessSemaphoreMutex(_zkConnection.curator(), lockPath);
}
use of com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException in project coprhd-controller by CoprHD.
the class VdcManager method auditCompletedDrOperation.
/**
* Check if ongoing DR operation succeeded or failed, then record audit log accordingly and remove this operation record from ZK.
*/
private void auditCompletedDrOperation() {
if (!drUtil.isActiveSite()) {
return;
}
InterProcessLock lock = coordinator.getCoordinatorClient().getSiteLocalLock(AUDIT_DR_OPERATION_LOCK);
boolean hasLock = false;
try {
hasLock = lock.acquire(AUDIT_LOCK_WAIT_TIME_SEC, TimeUnit.SECONDS);
if (!hasLock) {
return;
}
log.info("Local site is active, local node acquired lock, starting audit complete DR operations ...");
List<Configuration> configs = coordinator.getCoordinatorClient().queryAllConfiguration(DrOperationStatus.CONFIG_KIND);
if (configs == null || configs.isEmpty()) {
return;
}
for (Configuration config : configs) {
DrOperationStatus operation = new DrOperationStatus(config);
String siteId = operation.getSiteUuid();
InterState interState = operation.getInterState();
Site site = null;
try {
site = drUtil.getSiteFromLocalVdc(siteId);
} catch (RetryableCoordinatorException e) {
// Under this situation, just record audit log and clear DR operation status
if (interState.equals(InterState.REMOVING_STANDBY) && e.getServiceCode() == ServiceCode.COORDINATOR_SITE_NOT_FOUND) {
this.auditMgr.recordAuditLog(null, null, EVENT_SERVICE_TYPE, getOperationType(interState), System.currentTimeMillis(), AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_END, siteId);
coordinator.getCoordinatorClient().removeServiceConfiguration(config);
log.info("DR operation status has been cleared: {}", operation);
continue;
}
throw e;
}
SiteState currentState = site.getState();
if (currentState.equals(SiteState.STANDBY_ERROR)) {
// Failed
this.auditMgr.recordAuditLog(null, null, EVENT_SERVICE_TYPE, getOperationType(interState), System.currentTimeMillis(), AuditLogManager.AUDITLOG_FAILURE, AuditLogManager.AUDITOP_END, site.toBriefString());
} else if (!currentState.isDROperationOngoing()) {
// Succeeded
this.auditMgr.recordAuditLog(null, null, EVENT_SERVICE_TYPE, getOperationType(interState), System.currentTimeMillis(), AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_END, site.toBriefString());
} else {
// Still ongoing, do nothing
continue;
}
log.info(String.format("Site %s state has transformed from %s to %s", siteId, interState, currentState));
// clear this operation status
coordinator.getCoordinatorClient().removeServiceConfiguration(config);
log.info("DR operation status has been cleared: {}", operation);
}
} catch (Exception e) {
log.error("Auditing DR operation failed with exception", e);
} finally {
try {
if (hasLock) {
lock.release();
}
} catch (Exception e) {
log.error("Failed to release DR operation audit lock", e);
}
}
}
use of com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException in project coprhd-controller by CoprHD.
the class DbSvcRunner method isStarted.
/**
* Check if service is started
*
* @return
*/
public boolean isStarted() {
try {
CoordinatorClient coordinator = getCoordinator();
List<Service> service = coordinator.locateAllServices(serviceName, SVC_VERSION, null, null);
if (service.iterator().hasNext()) {
Service svc = service.iterator().next();
URI hostUri = svc.getEndpoint();
log.info("Found " + svc.getName() + "; host = " + hostUri.getHost() + "; port = " + hostUri.getPort());
return true;
}
} catch (RetryableCoordinatorException e) {
log.warn("no {} instance running. Coordinator exception message: {}", serviceName, e.getMessage());
} catch (Exception e) {
log.error("service lookup failure", e);
}
return false;
}
use of com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException in project coprhd-controller by CoprHD.
the class DisasterRecoveryService method updateSite.
/**
* Update site information. Only name and description can be updated.
*
* @param uuid target site uuid
* @param siteParam site information
* @brief Update Site information
* @return Response
*/
@PUT
@Path("/{uuid}")
@Consumes({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@CheckPermission(roles = { Role.SECURITY_ADMIN, Role.RESTRICTED_SECURITY_ADMIN }, blockProxies = true)
public Response updateSite(@PathParam("uuid") String uuid, SiteUpdateParam siteParam) {
log.info("Begin to update site information for {}", uuid);
Site site = null;
try {
site = drUtil.getSiteFromLocalVdc(uuid);
} catch (RetryableCoordinatorException e) {
log.error("Can't find site with specified site UUID {}", uuid);
throw APIException.badRequests.siteIdNotFound();
}
if (!validSiteName(siteParam.getName())) {
throw APIException.internalServerErrors.updateSiteFailed(site.getName(), String.format("Site name should not be empty or longer than %d characters.", SITE_NAME_LENGTH_LIMIT));
}
for (Site eachSite : drUtil.listSites()) {
if (eachSite.getUuid().equals(uuid)) {
continue;
}
if (eachSite.getName().equals(siteParam.getName())) {
throw APIException.internalServerErrors.addStandbyPrecheckFailed("Duplicate site name");
}
}
try {
site.setName(siteParam.getName());
site.setDescription(siteParam.getDescription());
coordinator.persistServiceConfiguration(site.toConfiguration());
auditDisasterRecoveryOps(OperationTypeEnum.UPDATE_SITE, AuditLogManager.AUDITLOG_SUCCESS, null, site.getName(), site.getVipEndPoint(), site.getUuid());
return Response.status(Response.Status.ACCEPTED).build();
} catch (Exception e) {
log.error("Error happened when update site %s", uuid, e);
auditDisasterRecoveryOps(OperationTypeEnum.UPDATE_SITE, AuditLogManager.AUDITLOG_FAILURE, null, site.getName(), site.getVipEndPoint(), site.getUuid());
throw APIException.internalServerErrors.updateSiteFailed(site.getName(), e.getMessage());
}
}
Aggregations