use of org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest in project hadoop by apache.
the class Checkpointer method doCheckpoint.
/**
* Create a new checkpoint
*/
void doCheckpoint() throws IOException {
BackupImage bnImage = getFSImage();
NNStorage bnStorage = bnImage.getStorage();
long startTime = monotonicNow();
bnImage.freezeNamespaceAtNextRoll();
NamenodeCommand cmd = getRemoteNamenodeProxy().startCheckpoint(backupNode.getRegistration());
CheckpointCommand cpCmd = null;
switch(cmd.getAction()) {
case NamenodeProtocol.ACT_SHUTDOWN:
shutdown();
throw new IOException("Name-node " + backupNode.nnRpcAddress + " requested shutdown.");
case NamenodeProtocol.ACT_CHECKPOINT:
cpCmd = (CheckpointCommand) cmd;
break;
default:
throw new IOException("Unsupported NamenodeCommand: " + cmd.getAction());
}
bnImage.waitUntilNamespaceFrozen();
CheckpointSignature sig = cpCmd.getSignature();
// Make sure we're talking to the same NN!
sig.validateStorageInfo(bnImage);
long lastApplied = bnImage.getLastAppliedTxId();
LOG.debug("Doing checkpoint. Last applied: " + lastApplied);
RemoteEditLogManifest manifest = getRemoteNamenodeProxy().getEditLogManifest(bnImage.getLastAppliedTxId() + 1);
boolean needReloadImage = false;
if (!manifest.getLogs().isEmpty()) {
RemoteEditLog firstRemoteLog = manifest.getLogs().get(0);
// to download and load the image.
if (firstRemoteLog.getStartTxId() > lastApplied + 1) {
LOG.info("Unable to roll forward using only logs. Downloading " + "image with txid " + sig.mostRecentCheckpointTxId);
MD5Hash downloadedHash = TransferFsImage.downloadImageToStorage(backupNode.nnHttpAddress, sig.mostRecentCheckpointTxId, bnStorage, true, false);
bnImage.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE, sig.mostRecentCheckpointTxId, downloadedHash);
lastApplied = sig.mostRecentCheckpointTxId;
needReloadImage = true;
}
if (firstRemoteLog.getStartTxId() > lastApplied + 1) {
throw new IOException("No logs to roll forward from " + lastApplied);
}
// get edits files
for (RemoteEditLog log : manifest.getLogs()) {
TransferFsImage.downloadEditsToStorage(backupNode.nnHttpAddress, log, bnStorage);
}
if (needReloadImage) {
LOG.info("Loading image with txid " + sig.mostRecentCheckpointTxId);
File file = bnStorage.findImageFile(NameNodeFile.IMAGE, sig.mostRecentCheckpointTxId);
bnImage.reloadFromImageFile(file, backupNode.getNamesystem());
}
rollForwardByApplyingLogs(manifest, bnImage, backupNode.getNamesystem());
}
long txid = bnImage.getLastAppliedTxId();
backupNode.namesystem.writeLock();
try {
backupNode.namesystem.setImageLoaded();
if (backupNode.namesystem.getBlocksTotal() > 0) {
long completeBlocksTotal = backupNode.namesystem.getCompleteBlocksTotal();
backupNode.namesystem.getBlockManager().setBlockTotal(completeBlocksTotal);
}
bnImage.saveFSImageInAllDirs(backupNode.getNamesystem(), txid);
if (!backupNode.namenode.isRollingUpgrade()) {
bnImage.updateStorageVersion();
}
} finally {
backupNode.namesystem.writeUnlock("doCheckpoint");
}
if (cpCmd.needToReturnImage()) {
TransferFsImage.uploadImageFromStorage(backupNode.nnHttpAddress, conf, bnStorage, NameNodeFile.IMAGE, txid);
}
getRemoteNamenodeProxy().endCheckpoint(backupNode.getRegistration(), sig);
if (backupNode.getRole() == NamenodeRole.BACKUP) {
bnImage.convergeJournalSpool();
}
// keep registration up to date
backupNode.setRegistration();
long imageSize = bnImage.getStorage().getFsImageName(txid).length();
LOG.info("Checkpoint completed in " + (monotonicNow() - startTime) / 1000 + " seconds." + " New Image Size: " + imageSize);
}
use of org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest in project hadoop by apache.
the class QuorumJournalManager method selectInputStreams.
@Override
public void selectInputStreams(Collection<EditLogInputStream> streams, long fromTxnId, boolean inProgressOk, boolean onlyDurableTxns) throws IOException {
QuorumCall<AsyncLogger, RemoteEditLogManifest> q = loggers.getEditLogManifest(fromTxnId, inProgressOk);
Map<AsyncLogger, RemoteEditLogManifest> resps = loggers.waitForWriteQuorum(q, selectInputStreamsTimeoutMs, "selectInputStreams");
LOG.debug("selectInputStream manifests:\n" + Joiner.on("\n").withKeyValueSeparator(": ").join(resps));
final PriorityQueue<EditLogInputStream> allStreams = new PriorityQueue<EditLogInputStream>(64, JournalSet.EDIT_LOG_INPUT_STREAM_COMPARATOR);
for (Map.Entry<AsyncLogger, RemoteEditLogManifest> e : resps.entrySet()) {
AsyncLogger logger = e.getKey();
RemoteEditLogManifest manifest = e.getValue();
long committedTxnId = manifest.getCommittedTxnId();
for (RemoteEditLog remoteLog : manifest.getLogs()) {
URL url = logger.buildURLToFetchLogs(remoteLog.getStartTxId());
long endTxId = remoteLog.getEndTxId();
// than committedTxnId. This ensures the consistency.
if (onlyDurableTxns && inProgressOk) {
endTxId = Math.min(endTxId, committedTxnId);
}
EditLogInputStream elis = EditLogFileInputStream.fromUrl(connectionFactory, url, remoteLog.getStartTxId(), endTxId, remoteLog.isInProgress());
allStreams.add(elis);
}
}
JournalSet.chainAndMakeRedundantStreams(streams, allStreams, fromTxnId);
}
use of org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest in project hadoop by apache.
the class SecondaryNameNode method doCheckpoint.
/**
* Create a new checkpoint
* @return if the image is fetched from primary or not
*/
@VisibleForTesting
@SuppressWarnings("deprecated")
public boolean doCheckpoint() throws IOException {
checkpointImage.ensureCurrentDirExists();
NNStorage dstStorage = checkpointImage.getStorage();
// Tell the namenode to start logging transactions in a new edit file
// Returns a token that would be used to upload the merged image.
CheckpointSignature sig = namenode.rollEditLog();
boolean loadImage = false;
boolean isFreshCheckpointer = (checkpointImage.getNamespaceID() == 0);
boolean isSameCluster = (dstStorage.versionSupportsFederation(NameNodeLayoutVersion.FEATURES) && sig.isSameCluster(checkpointImage)) || (!dstStorage.versionSupportsFederation(NameNodeLayoutVersion.FEATURES) && sig.namespaceIdMatches(checkpointImage));
if (isFreshCheckpointer || (isSameCluster && !sig.storageVersionMatches(checkpointImage.getStorage()))) {
// if we're a fresh 2NN, or if we're on the same cluster and our storage
// needs an upgrade, just take the storage info from the server.
dstStorage.setStorageInfo(sig);
dstStorage.setClusterID(sig.getClusterID());
dstStorage.setBlockPoolID(sig.getBlockpoolID());
loadImage = true;
}
sig.validateStorageInfo(checkpointImage);
// error simulation code for junit test
CheckpointFaultInjector.getInstance().afterSecondaryCallsRollEditLog();
RemoteEditLogManifest manifest = namenode.getEditLogManifest(sig.mostRecentCheckpointTxId + 1);
// Fetch fsimage and edits. Reload the image if previous merge failed.
loadImage |= downloadCheckpointFiles(fsName, checkpointImage, sig, manifest) | checkpointImage.hasMergeError();
try {
doMerge(sig, manifest, loadImage, checkpointImage, namesystem);
} catch (IOException ioe) {
// A merge error occurred. The in-memory file system state may be
// inconsistent, so the image and edits need to be reloaded.
checkpointImage.setMergeError();
throw ioe;
}
// Clear any error since merge was successful.
checkpointImage.clearMergeError();
//
// Upload the new image into the NameNode. Then tell the Namenode
// to make this new uploaded image as the most current image.
//
long txid = checkpointImage.getLastAppliedTxId();
TransferFsImage.uploadImageFromStorage(fsName, conf, dstStorage, NameNodeFile.IMAGE, txid);
// error simulation code for junit test
CheckpointFaultInjector.getInstance().afterSecondaryUploadsNewImage();
LOG.warn("Checkpoint done. New Image Size: " + dstStorage.getFsImageName(txid).length());
if (legacyOivImageDir != null && !legacyOivImageDir.isEmpty()) {
try {
checkpointImage.saveLegacyOIVImage(namesystem, legacyOivImageDir, new Canceler());
} catch (IOException e) {
LOG.warn("Failed to write legacy OIV image: ", e);
}
}
return loadImage;
}
use of org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest in project hadoop by apache.
the class TestPBHelper method testConvertRemoteEditLogManifest.
@Test
public void testConvertRemoteEditLogManifest() {
List<RemoteEditLog> logs = new ArrayList<RemoteEditLog>();
logs.add(new RemoteEditLog(1, 10));
logs.add(new RemoteEditLog(11, 20));
convertAndCheckRemoteEditLogManifest(new RemoteEditLogManifest(logs, 20), logs, 20);
convertAndCheckRemoteEditLogManifest(new RemoteEditLogManifest(logs), logs, HdfsServerConstants.INVALID_TXID);
}
use of org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest in project hadoop by apache.
the class TestCheckpoint method testNamespaceVerifiedOnFileTransfer.
/**
* Test that the primary NN will not serve any files to a 2NN who doesn't
* share its namespace ID, and also will not accept any files from one.
*/
@Test
public void testNamespaceVerifiedOnFileTransfer() throws IOException {
MiniDFSCluster cluster = null;
Configuration conf = new HdfsConfiguration();
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).format(true).build();
NamenodeProtocols nn = cluster.getNameNodeRpc();
URL fsName = DFSUtil.getInfoServer(cluster.getNameNode().getServiceRpcAddress(), conf, DFSUtil.getHttpClientScheme(conf)).toURL();
// Make a finalized log on the server side.
nn.rollEditLog();
RemoteEditLogManifest manifest = nn.getEditLogManifest(1);
RemoteEditLog log = manifest.getLogs().get(0);
NNStorage dstImage = Mockito.mock(NNStorage.class);
Mockito.doReturn(Lists.newArrayList(new File("/wont-be-written"))).when(dstImage).getFiles(Mockito.<NameNodeDirType>anyObject(), Mockito.anyString());
File mockImageFile = File.createTempFile("image", "");
FileOutputStream imageFile = new FileOutputStream(mockImageFile);
imageFile.write("data".getBytes());
imageFile.close();
Mockito.doReturn(mockImageFile).when(dstImage).findImageFile(Mockito.any(NameNodeFile.class), Mockito.anyLong());
Mockito.doReturn(new StorageInfo(1, 1, "X", 1, NodeType.NAME_NODE).toColonSeparatedString()).when(dstImage).toColonSeparatedString();
try {
TransferFsImage.downloadImageToStorage(fsName, 0, dstImage, false, false);
fail("Storage info was not verified");
} catch (IOException ioe) {
String msg = StringUtils.stringifyException(ioe);
assertTrue(msg, msg.contains("but the secondary expected"));
}
try {
TransferFsImage.downloadEditsToStorage(fsName, log, dstImage);
fail("Storage info was not verified");
} catch (IOException ioe) {
String msg = StringUtils.stringifyException(ioe);
assertTrue(msg, msg.contains("but the secondary expected"));
}
try {
TransferFsImage.uploadImageFromStorage(fsName, conf, dstImage, NameNodeFile.IMAGE, 0);
fail("Storage info was not verified");
} catch (IOException ioe) {
String msg = StringUtils.stringifyException(ioe);
assertTrue(msg, msg.contains("but the secondary expected"));
}
} finally {
cleanup(cluster);
cluster = null;
}
}
Aggregations