use of org.apache.curator.framework.recipes.locks.InterProcessLock in project coprhd-controller by CoprHD.
the class DbServiceImpl method start.
@Override
public void start() throws IOException {
if (_log.isInfoEnabled()) {
_log.info("Starting DB service...");
}
// Suppress Sonar violation of Lazy initialization of static fields should be synchronized
// start() method will be only called one time when startup dbsvc, so it's safe to ignore sonar violation
// NOSONAR ("squid:S2444")
instance = this;
if (backCompatPreYoda) {
_log.info("Pre-yoda back compatible flag detected. Initialize local keystore/truststore for Cassandra native encryption");
initKeystoreAndTruststore();
_schemaUtil.setBackCompatPreYoda(true);
}
System.setProperty("cassandra.config", _config);
System.setProperty("cassandra.config.loader", CassandraConfigLoader.class.getName());
// Set to false to clear all gossip state for the node on restart.
//
// We encounter a weird Cassandra grossip issue(COP-19246) - some nodes are missing from gossip
// when rebooting the entire cluster simultaneously. Critical Gossip fields(ApplicationState.STATUS, ApplicationState.TOKENS)
// are not synchronized during handshaking. It looks like some problem caused by incorrect gossip version/generation
// at system local table. So add this option to cleanup local gossip state during reboot
//
// Make sure add-vdc/add-standby passed when you would remove this option in the future.
//
// We need make sure majority local nodes are added as seed nodes. Otherwise cassandra may not see other nodes if it loses
// connection to other sites
System.setProperty("cassandra.load_ring_state", "false");
// See https://docs.datastax.com/en/cassandra/2.0/cassandra/operations/ops_add_dc_to_cluster_t.html
if (_schemaUtil.isStandby()) {
System.setProperty("cassandra.auto_bootstrap", "false");
}
InterProcessLock lock = null;
Configuration config = null;
StartupMode mode = null;
try {
// we use this lock to discourage more than one node bootstrapping / joining at the same time
// Cassandra can handle this but it's generally not recommended to make changes to schema concurrently
lock = getLock(getSchemaLockName());
config = checkConfiguration();
checkGlobalConfiguration();
checkVersionedConfiguration();
removeStaleConfiguration();
mode = checkStartupMode(config);
_log.info("Current startup mode is {}", mode);
// Check if service is allowed to get started by querying db offline info to avoid bringing back stale data.
// Skipping hibernate mode for node recovery procedure to recover the overdue node.
int nodeCount = ((CoordinatorClientImpl) _coordinator).getNodeCount();
if (nodeCount != 1 && mode.type != StartupMode.StartupModeType.HIBERNATE_MODE) {
checkDBOfflineInfo(_coordinator, _serviceInfo.getName(), dbDir, true);
}
// this call causes instantiation of a seed provider instance, so the check*Configuration
// calls must be preceed it
removeCassandraSavedCaches();
mode.onPreStart();
if (_jmxServer != null) {
_jmxServer.start();
System.setProperty("com.sun.management.jmxremote.port", Integer.toString(_jmxServer.getPort()));
}
_service = new CassandraDaemon();
_service.init(null);
_service.start();
cassandraInitialized = true;
mode.onPostStart();
} catch (Exception e) {
if (mode != null && mode.type == StartupMode.StartupModeType.HIBERNATE_MODE) {
printRecoveryWorkAround(e);
}
_log.error("e=", e);
throw new IllegalStateException(e);
} finally {
if (lock != null) {
try {
lock.release();
} catch (Exception ignore) {
_log.debug("lock release failed");
}
}
}
if (config.getConfig(DbConfigConstants.JOINED) == null) {
config.setConfig(DbConfigConstants.JOINED, Boolean.TRUE.toString());
_coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config);
}
_statusChecker.waitForAllNodesJoined();
_svcBeacon.start();
if (backCompatPreYoda) {
_log.info("Enable duplicated beacon in global area during pre-yoda upgrade");
startDupBeacon();
}
setDbInitializedFlag();
setDbConfigInitDone();
_dbClient.start();
if (_schemaUtil.isStandby()) {
String localDataRevision = getLocalDataRevision();
if (localDataRevision != null) {
_schemaUtil.checkDataRevision(localDataRevision);
}
}
// Setup the vdc information, so that login enabled before migration
if (!isGeoDbsvc()) {
_schemaUtil.checkAndSetupBootStrapInfo(_dbClient);
}
dbMgr.init();
if (_handler.run()) {
// Setup the bootstrap info root tenant, if root tenant migrated from local db, then skip it
if (isGeoDbsvc()) {
_schemaUtil.checkAndSetupBootStrapInfo(_dbClient);
} else {
_schemaUtil.checkAndInitStorageSystemTypes(_dbClient);
}
startBackgroundTasks();
_log.info("DB service started");
} else {
_log.error("DB migration failed. Skipping starting background tasks.");
}
}
use of org.apache.curator.framework.recipes.locks.InterProcessLock in project coprhd-controller by CoprHD.
the class MigrationHandlerImpl method run.
/**
*/
@Override
public boolean run() throws DatabaseException {
Date startTime = new Date();
// set state to migration_init and wait for all nodes to reach this state
setDbConfig(DbConfigConstants.MIGRATION_INIT);
targetVersion = service.getVersion();
statusChecker.setVersion(targetVersion);
statusChecker.setServiceName(service.getName());
// dbsvc will wait for all dbsvc, and geodbsvc waits for all geodbsvc.
statusChecker.waitForAllNodesMigrationInit();
if (schemaUtil.isStandby()) {
String currentSchemaVersion = coordinator.getCurrentDbSchemaVersion();
if (!StringUtils.equals(currentSchemaVersion, targetVersion)) {
// no migration on standby site
log.info("Migration does not run on standby. Change current version to {}", targetVersion);
schemaUtil.setCurrentVersion(targetVersion);
}
return true;
}
if (schemaUtil.isGeoDbsvc()) {
boolean schemaVersionChanged = isDbSchemaVersionChanged();
// scan and update cassandra schema
checkGeoDbSchema();
// no migration procedure for geosvc, just wait till migration is done on one of the
// dbsvcs
log.warn("Migration is not supported for Geodbsvc. Wait till migration is done");
statusChecker.waitForMigrationDone();
// Update vdc version
if (schemaVersionChanged) {
schemaUtil.insertOrUpdateVdcVersion(dbClient, true);
}
return true;
} else {
// for dbsvc, we have to wait till all geodbsvc becomes migration_init since we might
// need to copy geo-replicated resources from local to geo db.
statusChecker.waitForAllNodesMigrationInit(Constants.GEODBSVC_NAME);
}
InterProcessLock lock = null;
String currentSchemaVersion = null;
int retryCount = 0;
while (retryCount < MAX_MIGRATION_RETRY) {
log.debug("Migration handlers - Start. Trying to grab lock ...");
try {
// grab global lock for migration
lock = getLock(DB_MIGRATION_LOCK);
// make sure we haven't finished the migration on another node already
MigrationStatus status = coordinator.getMigrationStatus();
if (status != null) {
if (status == MigrationStatus.DONE) {
log.info("DB migration is done already. Skipping...");
if (null == getPersistedSchema(targetVersion)) {
persistSchema(targetVersion, DbSchemaChecker.marshalSchemas(currentSchema, null));
}
return true;
} else if (status == MigrationStatus.FAILED) {
log.error("DB migration is done already with status:{}. ", status);
return false;
}
}
schemaUtil.setMigrationStatus(MigrationStatus.RUNNING);
// we expect currentSchemaVersion to be set
currentSchemaVersion = coordinator.getCurrentDbSchemaVersion();
if (currentSchemaVersion == null) {
throw new IllegalStateException("Schema version not set");
}
// figure out our source and target versions
DbSchemas persistedSchema = getPersistedSchema(currentSchemaVersion);
if (isSchemaMissed(persistedSchema, currentSchemaVersion, targetVersion)) {
throw new IllegalStateException("Schema definition not found for version " + currentSchemaVersion);
}
if (isFreshInstall(persistedSchema, currentSchemaVersion, targetVersion)) {
log.info("saving schema of version {} to db", currentSchemaVersion);
persistedSchema = currentSchema;
persistSchema(currentSchemaVersion, DbSchemaChecker.marshalSchemas(persistedSchema, null));
}
// check if we have a schema upgrade to deal with
if (!currentSchemaVersion.equals(targetVersion)) {
log.info("Start scanning and creating new column families");
schemaUtil.checkCf(true);
log.info("Scanning and creating new column families succeed");
DbSchemasDiff diff = new DbSchemasDiff(persistedSchema, currentSchema, ignoredPkgs);
if (diff.isChanged()) {
// log the changes
dumpChanges(diff);
if (!diff.isUpgradable()) {
// we should never be here, but, if we are here, throw an IllegalStateException and stop
// To Do - dump the problematic diffs here
log.error("schema diff details: {}", DbSchemaChecker.marshalSchemasDiff(diff));
throw new IllegalStateException("schema not upgradable.");
}
}
log.info("Starting migration callbacks from {} to {}", currentSchemaVersion, targetVersion);
// we need to check point the progress of these callbacks as they are run,
// so we can resume from where we left off in case of restarts/errors
String checkpoint = schemaUtil.getMigrationCheckpoint();
if (checkpoint != null) {
log.info("Migration checkpoint found for {}", checkpoint);
}
// run all migration callbacks
runMigrationCallbacks(diff, checkpoint);
log.info("Done migration callbacks");
persistSchema(targetVersion, DbSchemaChecker.marshalSchemas(currentSchema, null));
schemaUtil.dropUnusedCfsIfExists();
// set current version in zk
schemaUtil.setCurrentVersion(targetVersion);
log.info("current schema version is updated to {}", targetVersion);
}
schemaUtil.setMigrationStatus(MigrationStatus.DONE);
// Remove migration checkpoint after done
schemaUtil.removeMigrationCheckpoint();
removeMigrationFailInfoIfExist();
log.debug("Migration handler - Done.");
return true;
} catch (Exception e) {
if (e instanceof MigrationCallbackException) {
markMigrationFailure(startTime, currentSchemaVersion, e);
} else if (isUnRetryableException(e)) {
markMigrationFailure(startTime, currentSchemaVersion, e);
return false;
} else {
log.warn("Retryable exception during migration ", e);
retryCount++;
lastException = e;
}
} finally {
if (lock != null) {
try {
lock.release();
} catch (Exception ignore) {
log.debug("lock release failed");
}
}
}
sleepBeforeRetry();
}
// while -- not done
markMigrationFailure(startTime, currentSchemaVersion, lastException);
return false;
}
use of org.apache.curator.framework.recipes.locks.InterProcessLock in project coprhd-controller by CoprHD.
the class MigrationHandlerImpl method checkGeoDbSchema.
private void checkGeoDbSchema() {
String targetVersion = service.getVersion();
if (isDbSchemaVersionChanged() && !VdcUtil.checkGeoCompatibleOfOtherVdcs(targetVersion)) {
log.info("Not all vdc are upgraded. Skip geodb schema change until all vdc are upgraded");
return;
}
log.info("Start scanning and creating new column families");
InterProcessLock lock = null;
try {
String lockName = DbConfigConstants.GEODB_SCHEMA_LOCK;
// grab global lock for migration
lock = getLock(lockName);
schemaUtil.checkCf();
log.info("Scanning and creating new column families succeed");
} catch (Exception ex) {
log.warn("Unexpected error when scan db schema", ex);
} finally {
if (lock != null) {
try {
lock.release();
} catch (Exception ignore) {
log.debug("lock release failed");
}
}
}
}
use of org.apache.curator.framework.recipes.locks.InterProcessLock in project coprhd-controller by CoprHD.
the class SchemaUtil method checkAndSetupBootStrapInfo.
/**
* Init the bootstrap info, including:
* check and setup root tenant or my vdc info, if it doesn't exist
*/
public void checkAndSetupBootStrapInfo(DbClient dbClient) {
// Standby site need not do the bootstrap
if (onStandby) {
_log.info("Skip boot strap info initialization on standby site");
return;
}
// Only the first VDC need check root tenant
if (_vdcList != null && _vdcList.size() > 1) {
_log.info("Skip root tenant check for more than one vdcs. Current number of vdcs: {}", _vdcList.size());
return;
}
int retryIntervalSecs = DBINIT_RETRY_INTERVAL;
boolean done = false;
boolean wait;
while (!done) {
wait = false;
InterProcessLock lock = null;
try {
lock = _coordinator.getLock(getBootstrapLockName());
_log.info("bootstrap info check - waiting for bootstrap lock");
lock.acquire();
if (isGeoDbsvc()) {
// insert root tenant if not exist for geodb
insertDefaultRootTenant(dbClient);
} else {
// insert default vdc info if not exist for local db
insertMyVdcInfo(dbClient);
// insert VdcVersion if not exist for geo db, don't insert in geo db to avoid race condition.
insertVdcVersion(dbClient);
// insert local user's password history if not exist for local db
insertPasswordHistory(dbClient);
}
done = true;
} catch (Exception e) {
if (e instanceof IllegalStateException) {
throw (IllegalStateException) e;
} else {
_log.warn("Exception while checking for bootstrap info, will retry in {} secs", retryIntervalSecs, e);
wait = true;
}
} finally {
if (lock != null) {
try {
lock.release();
} catch (Exception e) {
_log.error("Fail to release lock", e);
}
}
}
if (wait) {
try {
Thread.sleep(retryIntervalSecs * 1000);
} catch (InterruptedException ex) {
_log.warn("Thread is interrupted during wait for retry", ex);
}
}
}
}
use of org.apache.curator.framework.recipes.locks.InterProcessLock in project coprhd-controller by CoprHD.
the class CoordinatorClientImpl method checkAndCreateSiteSpecificSection.
/**
* Check and initialize site specific section for current site. If site specific section is empty,
* we always assume current site is active site
*
* @throws Exception
*/
private void checkAndCreateSiteSpecificSection() throws Exception {
if (isSiteSpecificSectionInited()) {
log.info("Site specific section for {} initialized", getSiteId());
return;
}
log.info("The site specific section has NOT been initialized");
InterProcessLock lock = getLock(ZkPath.SITES.name());
try {
lock.acquire();
if (!isSiteSpecificSectionInited()) {
createSiteSpecificSection();
}
} catch (Exception e) {
log.error("Failed to initialize site specific area for {}.", ZkPath.SITES, e);
throw e;
} finally {
try {
lock.release();
} catch (Exception e) {
log.error("Failed to release the lock for {}. Error {}", ZkPath.SITES, e);
}
}
}
Aggregations