use of org.apache.hadoop.hdds.utils.db.RocksDBCheckpoint in project ozone by apache.
the class OzoneManagerServiceProviderImpl method getOzoneManagerDBSnapshot.
/**
* Method to obtain current OM DB Snapshot.
* @return DBCheckpoint instance.
*/
@VisibleForTesting
DBCheckpoint getOzoneManagerDBSnapshot() {
String snapshotFileName = RECON_OM_SNAPSHOT_DB + "_" + System.currentTimeMillis();
File targetFile = new File(omSnapshotDBParentDir, snapshotFileName + ".tar.gz");
try {
SecurityUtil.doAsLoginUser(() -> {
try (InputStream inputStream = reconUtils.makeHttpCall(connectionFactory, getOzoneManagerSnapshotUrl(), isOmSpnegoEnabled()).getInputStream()) {
FileUtils.copyInputStreamToFile(inputStream, targetFile);
}
return null;
});
// Untar the checkpoint file.
Path untarredDbDir = Paths.get(omSnapshotDBParentDir.getAbsolutePath(), snapshotFileName);
reconUtils.untarCheckpointFile(targetFile, untarredDbDir);
FileUtils.deleteQuietly(targetFile);
// RocksDB.
return new RocksDBCheckpoint(untarredDbDir);
} catch (IOException e) {
LOG.error("Unable to obtain Ozone Manager DB Snapshot. ", e);
}
return null;
}
use of org.apache.hadoop.hdds.utils.db.RocksDBCheckpoint in project ozone by apache.
the class StorageContainerServiceProviderImpl method getSCMDBSnapshot.
public DBCheckpoint getSCMDBSnapshot() {
String snapshotFileName = RECON_SCM_SNAPSHOT_DB + "_" + System.currentTimeMillis();
File targetFile = new File(scmSnapshotDBParentDir, snapshotFileName + ".tar.gz");
try {
if (!SCMHAUtils.isSCMHAEnabled(configuration)) {
SecurityUtil.doAsLoginUser(() -> {
try (InputStream inputStream = reconUtils.makeHttpCall(connectionFactory, getScmDBSnapshotUrl(), isOmSpnegoEnabled()).getInputStream()) {
FileUtils.copyInputStreamToFile(inputStream, targetFile);
}
return null;
});
LOG.info("Downloaded SCM Snapshot from SCM");
} else {
List<String> ratisRoles = scmClient.getScmInfo().getRatisPeerRoles();
for (String ratisRole : ratisRoles) {
String[] role = ratisRole.split(":");
if (role[2].equals(RaftProtos.RaftPeerRole.LEADER.toString())) {
String hostAddress = role[4].trim();
int grpcPort = configuration.getInt(ScmConfigKeys.OZONE_SCM_GRPC_PORT_KEY, ScmConfigKeys.OZONE_SCM_GRPC_PORT_DEFAULT);
try (SCMSnapshotDownloader downloadClient = new InterSCMGrpcClient(hostAddress, grpcPort, configuration, new ReconCertificateClient(new SecurityConfig(configuration), reconStorage.getReconCertSerialId()))) {
downloadClient.download(targetFile.toPath()).get();
} catch (ExecutionException | InterruptedException e) {
LOG.error("Rocks DB checkpoint downloading failed", e);
throw new IOException(e);
}
LOG.info("Downloaded SCM Snapshot from Leader SCM");
break;
}
}
}
Path untarredDbDir = Paths.get(scmSnapshotDBParentDir.getAbsolutePath(), snapshotFileName);
reconUtils.untarCheckpointFile(targetFile, untarredDbDir);
FileUtils.deleteQuietly(targetFile);
return new RocksDBCheckpoint(untarredDbDir);
} catch (IOException e) {
LOG.error("Unable to obtain SCM DB Snapshot. ", e);
}
return null;
}
use of org.apache.hadoop.hdds.utils.db.RocksDBCheckpoint in project ozone by apache.
the class TestSCMInstallSnapshotWithHA method testInstallCorruptedCheckpointFailure.
@Test
public void testInstallCorruptedCheckpointFailure() throws Exception {
StorageContainerManager leaderSCM = getLeader(cluster);
// Find the inactive SCM
String followerId = getInactiveSCM(cluster).getSCMNodeId();
StorageContainerManager followerSCM = cluster.getSCM(followerId);
// Do some transactions so that the log index increases
writeToIncreaseLogIndex(leaderSCM, 100);
File oldDBLocation = followerSCM.getScmMetadataStore().getStore().getDbLocation();
SCMStateMachine followerSM = followerSCM.getScmHAManager().getRatisServer().getSCMStateMachine();
TermIndex termIndex = followerSM.getLastAppliedTermIndex();
DBCheckpoint leaderDbCheckpoint = leaderSCM.getScmMetadataStore().getStore().getCheckpoint(false);
Path leaderCheckpointLocation = leaderDbCheckpoint.getCheckpointLocation();
TransactionInfo leaderCheckpointTrxnInfo = HAUtils.getTrxnInfoFromCheckpoint(conf, leaderCheckpointLocation, new SCMDBDefinition());
Assert.assertNotNull(leaderCheckpointLocation);
// Take a backup of the current DB
String dbBackupName = "SCM_CHECKPOINT_BACKUP" + termIndex.getIndex() + "_" + System.currentTimeMillis();
File dbDir = oldDBLocation.getParentFile();
File checkpointBackup = new File(dbDir, dbBackupName);
// Take a backup of the leader checkpoint
FileUtils.copyDirectory(leaderCheckpointLocation.toFile(), checkpointBackup, false);
// Corrupt the leader checkpoint and install that on the follower. The
// operation should fail and should shutdown.
boolean delete = true;
for (File file : leaderCheckpointLocation.toFile().listFiles()) {
if (file.getName().contains(".sst")) {
if (delete) {
file.delete();
delete = false;
} else {
delete = true;
}
}
}
SCMHAManagerImpl scmhaManager = (SCMHAManagerImpl) (followerSCM.getScmHAManager());
GenericTestUtils.setLogLevel(SCMHAManagerImpl.getLogger(), Level.ERROR);
GenericTestUtils.LogCapturer logCapture = GenericTestUtils.LogCapturer.captureLogs(SCMHAManagerImpl.getLogger());
scmhaManager.setExitManagerForTesting(new DummyExitManager());
followerSM.pause();
scmhaManager.installCheckpoint(leaderCheckpointLocation, leaderCheckpointTrxnInfo);
Assert.assertTrue(logCapture.getOutput().contains("Failed to reload SCM state and instantiate services."));
Assert.assertTrue(followerSM.getLifeCycleState().isPausingOrPaused());
// Verify correct reloading
followerSM.setInstallingDBCheckpoint(new RocksDBCheckpoint(checkpointBackup.toPath()));
followerSM.reinitialize();
Assert.assertEquals(followerSM.getLastAppliedTermIndex(), leaderCheckpointTrxnInfo.getTermIndex());
}
use of org.apache.hadoop.hdds.utils.db.RocksDBCheckpoint in project ozone by apache.
the class OzoneManagerSnapshotProvider method getOzoneManagerDBSnapshot.
/**
* Download the latest checkpoint from OM Leader via HTTP.
* @param leaderOMNodeID leader OM Node ID.
* @return the DB checkpoint (including the ratis snapshot index)
*/
public DBCheckpoint getOzoneManagerDBSnapshot(String leaderOMNodeID) throws IOException {
String snapshotTime = Long.toString(System.currentTimeMillis());
String snapshotFileName = OM_DB_NAME + "-" + leaderOMNodeID + "-" + snapshotTime;
String snapshotFilePath = Paths.get(omSnapshotDir.getAbsolutePath(), snapshotFileName).toFile().getAbsolutePath();
File targetFile = new File(snapshotFileName + ".tar.gz");
String omCheckpointUrl = peerNodesMap.get(leaderOMNodeID).getOMDBCheckpointEnpointUrl(httpPolicy.isHttpEnabled());
LOG.info("Downloading latest checkpoint from Leader OM {}. Checkpoint " + "URL: {}", leaderOMNodeID, omCheckpointUrl);
SecurityUtil.doAsCurrentUser(() -> {
HttpURLConnection httpURLConnection = (HttpURLConnection) connectionFactory.openConnection(new URL(omCheckpointUrl), spnegoEnabled);
httpURLConnection.connect();
int errorCode = httpURLConnection.getResponseCode();
if ((errorCode != HTTP_OK) && (errorCode != HTTP_CREATED)) {
throw new IOException("Unexpected exception when trying to reach " + "OM to download latest checkpoint. Checkpoint URL: " + omCheckpointUrl + ". ErrorCode: " + errorCode);
}
try (InputStream inputStream = httpURLConnection.getInputStream()) {
FileUtils.copyInputStreamToFile(inputStream, targetFile);
}
return null;
});
// Untar the checkpoint file.
Path untarredDbDir = Paths.get(snapshotFilePath);
FileUtil.unTar(targetFile, untarredDbDir.toFile());
FileUtils.deleteQuietly(targetFile);
LOG.info("Successfully downloaded latest checkpoint from leader OM: {}", leaderOMNodeID);
RocksDBCheckpoint omCheckpoint = new RocksDBCheckpoint(untarredDbDir);
return omCheckpoint;
}
use of org.apache.hadoop.hdds.utils.db.RocksDBCheckpoint in project ozone by apache.
the class SCMSnapshotProvider method getSCMDBSnapshot.
/**
* Download the latest checkpoint from SCM Leader .
* @param leaderSCMNodeID leader SCM Node ID.
* @return the DB checkpoint (including the ratis snapshot index)
*/
public DBCheckpoint getSCMDBSnapshot(String leaderSCMNodeID) throws IOException {
String snapshotTime = Long.toString(System.currentTimeMillis());
String snapshotFileName = OzoneConsts.SCM_DB_NAME + "-" + leaderSCMNodeID + "-" + snapshotTime;
String snapshotFilePath = Paths.get(scmSnapshotDir.getAbsolutePath(), snapshotFileName).toFile().getAbsolutePath();
File targetFile = new File(snapshotFilePath + ".tar.gz");
// the downloadClient instance will be created as and when install snapshot
// request is received. No caching of the client as it should be a very rare
int port = peerNodesMap.get(leaderSCMNodeID).getGrpcPort();
String host = peerNodesMap.get(leaderSCMNodeID).getInetAddress().getHostAddress();
try (SCMSnapshotDownloader downloadClient = new InterSCMGrpcClient(host, port, conf, scmCertificateClient)) {
downloadClient.download(targetFile.toPath()).get();
} catch (ExecutionException | InterruptedException e) {
LOG.error("Rocks DB checkpoint downloading failed", e);
Thread.currentThread().interrupt();
throw new IOException(e);
}
// Untar the checkpoint file.
Path untarredDbDir = Paths.get(snapshotFilePath);
FileUtil.unTar(targetFile, untarredDbDir.toFile());
FileUtils.deleteQuietly(targetFile);
LOG.info("Successfully downloaded latest checkpoint from leader SCM: {} path {}", leaderSCMNodeID, untarredDbDir.toAbsolutePath());
RocksDBCheckpoint scmCheckpoint = new RocksDBCheckpoint(untarredDbDir);
return scmCheckpoint;
}
Aggregations