Search in sources :

Example 1 with Configuration

use of com.emc.storageos.coordinator.common.Configuration in project coprhd-controller by CoprHD.

the class DisasterRecoveryServiceTest method testPrecheckForPlannedFailover.

@Test
public void testPrecheckForPlannedFailover() {
    String standbyUUID = "a918ebd4-bbf4-378b-8034-b03423f9edfd";
    // test for invalid uuid
    try {
        APIException e = APIException.internalServerErrors.switchoverPrecheckFailed(standby.getUuid(), "Standby uuid is not valid, can't find in ZK");
        doThrow(e).when(drUtil).getSiteFromLocalVdc(standbyUUID);
        drService.precheckForSwitchover(standbyUUID);
        fail("should throw exception when met invalid standby uuid");
    } catch (InternalServerErrorException e) {
        assertEquals(e.getServiceCode(), ServiceCode.SYS_DR_OPERATION_PRECHECK_FAILED);
    }
    Site site = new Site();
    site.setUuid(standbyUUID);
    Configuration config = site.toConfiguration();
    // test for failover to primary
    try {
        // Mock a standby in coordinator, so it would pass invalid standby checking, go to next check
        doReturn(config).when(coordinator).queryConfiguration(String.format("%s/vdc1", Site.CONFIG_KIND), standbyUUID);
        drService.precheckForSwitchover(standbyUUID);
        fail("should throw exception when trying to failover to a primary site");
    } catch (InternalServerErrorException e) {
        assertEquals(e.getServiceCode(), ServiceCode.SYS_DR_OPERATION_PRECHECK_FAILED);
    }
    // test for primary unstable case
    try {
        // Mock a primary site with different uuid with to-be-failover standby, so go to next check
        doReturn(false).when(drService).isClusterStable();
        drService.precheckForSwitchover(standbyUUID);
        fail("should throw exception when primary is not stable");
    } catch (InternalServerErrorException e) {
        assertEquals(e.getServiceCode(), ServiceCode.SYS_DR_OPERATION_PRECHECK_FAILED);
    }
    // test for standby unstable case
    try {
        // Mock a stable status for primary, so go to next check
        doReturn(true).when(drService).isClusterStable();
        doReturn(ClusterInfo.ClusterState.DEGRADED).when(coordinator).getControlNodesState(eq(standbyUUID));
        drService.precheckForSwitchover(standbyUUID);
        fail("should throw exception when site to failover to is not stable");
    } catch (InternalServerErrorException e) {
        assertEquals(e.getServiceCode(), ServiceCode.SYS_DR_OPERATION_PRECHECK_FAILED);
    }
    // test for standby not STANDBY_CYNCED state
    try {
        // Mock a stable status for standby, so go to next check
        doReturn(ClusterInfo.ClusterState.STABLE).when(coordinator).getControlNodesState(anyString());
        // not fully synced
        config.setConfig("state", "STANDBY_SYNCING");
        doReturn(config).when(coordinator).queryConfiguration(String.format("%s/vdc1", Site.CONFIG_KIND), standbyUUID);
        drService.precheckForSwitchover(standbyUUID);
        fail("should throw exception when standby site is not fully synced");
    } catch (InternalServerErrorException e) {
        assertEquals(e.getServiceCode(), ServiceCode.SYS_DR_OPERATION_PRECHECK_FAILED);
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) Configuration(com.emc.storageos.coordinator.common.Configuration) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) Matchers.anyString(org.mockito.Matchers.anyString) Test(org.junit.Test)

Example 2 with Configuration

use of com.emc.storageos.coordinator.common.Configuration in project coprhd-controller by CoprHD.

the class DisasterRecoveryServiceTest method testPrecheckForStandbyAttach_PrimarySite_EmptyPrimaryID.

@Test
public void testPrecheckForStandbyAttach_PrimarySite_EmptyPrimaryID() throws Exception {
    doReturn(ClusterInfo.ClusterState.STABLE).when(coordinator).getControlNodesState();
    Configuration config = new ConfigurationImpl();
    doReturn(config).when(coordinator).queryConfiguration(Constants.CONFIG_DR_ACTIVE_KIND, Constants.CONFIG_DR_ACTIVE_ID);
    doReturn(primarySite).when(drUtil).getLocalSite();
    doReturn(primarySite).when(drUtil).getActiveSite();
    drService.precheckForStandbyAdd(standby, mockViPRCoreClient(null));
}
Also used : Configuration(com.emc.storageos.coordinator.common.Configuration) ConfigurationImpl(com.emc.storageos.coordinator.common.impl.ConfigurationImpl) Test(org.junit.Test)

Example 3 with Configuration

use of com.emc.storageos.coordinator.common.Configuration in project coprhd-controller by CoprHD.

the class DbRebuildRunnable method run.

@Override
public void run() {
    if (isRunning) {
        log.info("db rebuild in progress, nothing to do");
        return;
    }
    DrUtil drUtil = new DrUtil(coordinator);
    Site localSite = drUtil.getLocalSite();
    if (!localSite.getState().equals(SiteState.STANDBY_SYNCING)) {
        log.info("db in sync, nothing to do");
        return;
    }
    Configuration dbconfig = coordinator.queryConfiguration(coordinator.getSiteId(), coordinator.getVersionedDbConfigPath(service.getName(), service.getVersion()), service.getId());
    if (isLastDataSyncCurrent(dbconfig)) {
        log.info("last data sync time is later than the target site info update, nothing to do");
        return;
    }
    Site primarySite = drUtil.getActiveSite();
    String sourceDc = drUtil.getCassandraDcId(primarySite);
    log.info("starting db rebuild from source dc {}", sourceDc);
    isRunning = true;
    StorageService.instance.rebuild(sourceDc);
    long currentSyncTime = System.currentTimeMillis();
    log.info("local db rebuild finishes. Updating last data sync time to {}", currentSyncTime);
    dbconfig.setConfig(DbConfigConstants.LAST_DATA_SYNC_TIME, String.valueOf(currentSyncTime));
    coordinator.persistServiceConfiguration(coordinator.getSiteId(), dbconfig);
    if (dbRebuildComplete(Constants.DBSVC_NAME) && dbRebuildComplete(Constants.GEODBSVC_NAME)) {
        localSite = drUtil.getLocalSite();
        // do nothing if it gets set to STANDBY_ERROR earlier
        if (localSite.getState().equals(SiteState.STANDBY_SYNCING)) {
            purgeOldDataRevision(drUtil);
            // reset heartbeat for this site
            SiteMonitorResult dbHeartbeat = coordinator.getTargetInfo(localSite.getUuid(), SiteMonitorResult.class);
            if (dbHeartbeat != null) {
                dbHeartbeat.setDbQuorumLostSince(0);
                coordinator.setTargetInfo(localSite.getUuid(), dbHeartbeat);
                log.info("Reset db heartbeat state for {}", localSite.getUuid());
            }
            log.info("all db rebuild finish, updating site state to STANDBY_SYNCED");
            localSite.setState(SiteState.STANDBY_SYNCED);
            coordinator.persistServiceConfiguration(localSite.toConfiguration());
        }
    }
    isRunning = false;
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) Configuration(com.emc.storageos.coordinator.common.Configuration) SiteMonitorResult(com.emc.storageos.coordinator.client.model.SiteMonitorResult) DrUtil(com.emc.storageos.coordinator.client.service.DrUtil)

Example 4 with Configuration

use of com.emc.storageos.coordinator.common.Configuration in project coprhd-controller by CoprHD.

the class DbServiceImpl method setConfigValue.

public void setConfigValue(String key, String value) {
    String configKind = _coordinator.getDbConfigPath(_serviceInfo.getName());
    Configuration config = _coordinator.queryConfiguration(_coordinator.getSiteId(), configKind, _serviceInfo.getId());
    if (config != null) {
        config.setConfig(key, value);
        _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config);
    }
}
Also used : Configuration(com.emc.storageos.coordinator.common.Configuration)

Example 5 with Configuration

use of com.emc.storageos.coordinator.common.Configuration in project coprhd-controller by CoprHD.

the class DbServiceImpl method start.

@Override
public void start() throws IOException {
    if (_log.isInfoEnabled()) {
        _log.info("Starting DB service...");
    }
    // Suppress Sonar violation of Lazy initialization of static fields should be synchronized
    // start() method will be only called one time when startup dbsvc, so it's safe to ignore sonar violation
    // NOSONAR ("squid:S2444")
    instance = this;
    if (backCompatPreYoda) {
        _log.info("Pre-yoda back compatible flag detected. Initialize local keystore/truststore for Cassandra native encryption");
        initKeystoreAndTruststore();
        _schemaUtil.setBackCompatPreYoda(true);
    }
    System.setProperty("cassandra.config", _config);
    System.setProperty("cassandra.config.loader", CassandraConfigLoader.class.getName());
    // Set to false to clear all gossip state for the node on restart.
    // 
    // We encounter a weird Cassandra grossip issue(COP-19246) - some nodes are missing from gossip
    // when rebooting the entire cluster simultaneously. Critical Gossip fields(ApplicationState.STATUS, ApplicationState.TOKENS)
    // are not synchronized during handshaking. It looks like some problem caused by incorrect gossip version/generation
    // at system local table. So add this option to cleanup local gossip state during reboot
    // 
    // Make sure add-vdc/add-standby passed when you would remove this option in the future.
    // 
    // We need make sure majority local nodes are added as seed nodes. Otherwise cassandra may not see other nodes if it loses
    // connection to other sites
    System.setProperty("cassandra.load_ring_state", "false");
    // See https://docs.datastax.com/en/cassandra/2.0/cassandra/operations/ops_add_dc_to_cluster_t.html
    if (_schemaUtil.isStandby()) {
        System.setProperty("cassandra.auto_bootstrap", "false");
    }
    InterProcessLock lock = null;
    Configuration config = null;
    StartupMode mode = null;
    try {
        // we use this lock to discourage more than one node bootstrapping / joining at the same time
        // Cassandra can handle this but it's generally not recommended to make changes to schema concurrently
        lock = getLock(getSchemaLockName());
        config = checkConfiguration();
        checkGlobalConfiguration();
        checkVersionedConfiguration();
        removeStaleConfiguration();
        mode = checkStartupMode(config);
        _log.info("Current startup mode is {}", mode);
        // Check if service is allowed to get started by querying db offline info to avoid bringing back stale data.
        // Skipping hibernate mode for node recovery procedure to recover the overdue node.
        int nodeCount = ((CoordinatorClientImpl) _coordinator).getNodeCount();
        if (nodeCount != 1 && mode.type != StartupMode.StartupModeType.HIBERNATE_MODE) {
            checkDBOfflineInfo(_coordinator, _serviceInfo.getName(), dbDir, true);
        }
        // this call causes instantiation of a seed provider instance, so the check*Configuration
        // calls must be preceed it
        removeCassandraSavedCaches();
        mode.onPreStart();
        if (_jmxServer != null) {
            _jmxServer.start();
            System.setProperty("com.sun.management.jmxremote.port", Integer.toString(_jmxServer.getPort()));
        }
        _service = new CassandraDaemon();
        _service.init(null);
        _service.start();
        cassandraInitialized = true;
        mode.onPostStart();
    } catch (Exception e) {
        if (mode != null && mode.type == StartupMode.StartupModeType.HIBERNATE_MODE) {
            printRecoveryWorkAround(e);
        }
        _log.error("e=", e);
        throw new IllegalStateException(e);
    } finally {
        if (lock != null) {
            try {
                lock.release();
            } catch (Exception ignore) {
                _log.debug("lock release failed");
            }
        }
    }
    if (config.getConfig(DbConfigConstants.JOINED) == null) {
        config.setConfig(DbConfigConstants.JOINED, Boolean.TRUE.toString());
        _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config);
    }
    _statusChecker.waitForAllNodesJoined();
    _svcBeacon.start();
    if (backCompatPreYoda) {
        _log.info("Enable duplicated beacon in global area during pre-yoda upgrade");
        startDupBeacon();
    }
    setDbInitializedFlag();
    setDbConfigInitDone();
    _dbClient.start();
    if (_schemaUtil.isStandby()) {
        String localDataRevision = getLocalDataRevision();
        if (localDataRevision != null) {
            _schemaUtil.checkDataRevision(localDataRevision);
        }
    }
    // Setup the vdc information, so that login enabled before migration
    if (!isGeoDbsvc()) {
        _schemaUtil.checkAndSetupBootStrapInfo(_dbClient);
    }
    dbMgr.init();
    if (_handler.run()) {
        // Setup the bootstrap info root tenant, if root tenant migrated from local db, then skip it
        if (isGeoDbsvc()) {
            _schemaUtil.checkAndSetupBootStrapInfo(_dbClient);
        } else {
            _schemaUtil.checkAndInitStorageSystemTypes(_dbClient);
        }
        startBackgroundTasks();
        _log.info("DB service started");
    } else {
        _log.error("DB migration failed. Skipping starting background tasks.");
    }
}
Also used : CoordinatorClientImpl(com.emc.storageos.coordinator.client.service.impl.CoordinatorClientImpl) Configuration(com.emc.storageos.coordinator.common.Configuration) InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock) IOException(java.io.IOException) CassandraDaemon(org.apache.cassandra.service.CassandraDaemon)

Aggregations

Configuration (com.emc.storageos.coordinator.common.Configuration)87 ConfigurationImpl (com.emc.storageos.coordinator.common.impl.ConfigurationImpl)16 InterProcessLock (org.apache.curator.framework.recipes.locks.InterProcessLock)11 CoordinatorException (com.emc.storageos.coordinator.exceptions.CoordinatorException)9 IOException (java.io.IOException)9 CoordinatorClient (com.emc.storageos.coordinator.client.service.CoordinatorClient)8 RetryableCoordinatorException (com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException)8 Site (com.emc.storageos.coordinator.client.model.Site)7 UnknownHostException (java.net.UnknownHostException)7 KeeperException (org.apache.zookeeper.KeeperException)7 PropertyInfoMapper.decodeFromString (com.emc.storageos.coordinator.mapper.PropertyInfoMapper.decodeFromString)6 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)6 Test (org.junit.Test)6 Matchers.anyString (org.mockito.Matchers.anyString)3 MigrationStatus (com.emc.storageos.coordinator.client.model.MigrationStatus)2 PropertyInfoExt (com.emc.storageos.coordinator.client.model.PropertyInfoExt)2 DrUtil (com.emc.storageos.coordinator.client.service.DrUtil)2 CoordinatorClientInetAddressMap (com.emc.storageos.coordinator.client.service.impl.CoordinatorClientInetAddressMap)2 SiteConfigRestRep (com.emc.storageos.model.dr.SiteConfigRestRep)2