use of org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile in project hadoop by apache.
the class StandbyCheckpointer method doCheckpoint.
private void doCheckpoint(boolean sendCheckpoint) throws InterruptedException, IOException {
assert canceler != null;
final long txid;
final NameNodeFile imageType;
// Acquire cpLock to make sure no one is modifying the name system.
// It does not need the full namesystem write lock, since the only thing
// that modifies namesystem on standby node is edit log replaying.
namesystem.cpLockInterruptibly();
try {
assert namesystem.getEditLog().isOpenForRead() : "Standby Checkpointer should only attempt a checkpoint when " + "NN is in standby mode, but the edit logs are in an unexpected state";
FSImage img = namesystem.getFSImage();
long prevCheckpointTxId = img.getStorage().getMostRecentCheckpointTxId();
long thisCheckpointTxId = img.getCorrectLastAppliedOrWrittenTxId();
assert thisCheckpointTxId >= prevCheckpointTxId;
if (thisCheckpointTxId == prevCheckpointTxId) {
LOG.info("A checkpoint was triggered but the Standby Node has not " + "received any transactions since the last checkpoint at txid " + thisCheckpointTxId + ". Skipping...");
return;
}
if (namesystem.isRollingUpgrade() && !namesystem.getFSImage().hasRollbackFSImage()) {
// if we will do rolling upgrade but have not created the rollback image
// yet, name this checkpoint as fsimage_rollback
imageType = NameNodeFile.IMAGE_ROLLBACK;
} else {
imageType = NameNodeFile.IMAGE;
}
img.saveNamespace(namesystem, imageType, canceler);
txid = img.getStorage().getMostRecentCheckpointTxId();
assert txid == thisCheckpointTxId : "expected to save checkpoint at txid=" + thisCheckpointTxId + " but instead saved at txid=" + txid;
// Save the legacy OIV image, if the output dir is defined.
String outputDir = checkpointConf.getLegacyOivImageDir();
if (outputDir != null && !outputDir.isEmpty()) {
img.saveLegacyOIVImage(namesystem, outputDir, canceler);
}
} finally {
namesystem.cpUnlock();
}
//early exit if we shouldn't actually send the checkpoint to the ANN
if (!sendCheckpoint) {
return;
}
// Upload the saved checkpoint back to the active
// Do this in a separate thread to avoid blocking transition to active, but don't allow more
// than the expected number of tasks to run or queue up
// See HDFS-4816
ExecutorService executor = new ThreadPoolExecutor(0, activeNNAddresses.size(), 100, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>(activeNNAddresses.size()), uploadThreadFactory);
// for right now, just match the upload to the nn address by convention. There is no need to
// directly tie them together by adding a pair class.
List<Future<TransferFsImage.TransferResult>> uploads = new ArrayList<Future<TransferFsImage.TransferResult>>();
for (final URL activeNNAddress : activeNNAddresses) {
Future<TransferFsImage.TransferResult> upload = executor.submit(new Callable<TransferFsImage.TransferResult>() {
@Override
public TransferFsImage.TransferResult call() throws IOException {
return TransferFsImage.uploadImageFromStorage(activeNNAddress, conf, namesystem.getFSImage().getStorage(), imageType, txid, canceler);
}
});
uploads.add(upload);
}
InterruptedException ie = null;
IOException ioe = null;
int i = 0;
boolean success = false;
for (; i < uploads.size(); i++) {
Future<TransferFsImage.TransferResult> upload = uploads.get(i);
try {
// TODO should there be some smarts here about retries nodes that are not the active NN?
if (upload.get() == TransferFsImage.TransferResult.SUCCESS) {
success = true;
//avoid getting the rest of the results - we don't care since we had a successful upload
break;
}
} catch (ExecutionException e) {
ioe = new IOException("Exception during image upload: " + e.getMessage(), e.getCause());
break;
} catch (InterruptedException e) {
ie = e;
break;
}
}
lastUploadTime = monotonicNow();
// we are primary if we successfully updated the ANN
this.isPrimaryCheckPointer = success;
// exceptions, so we just handle the ones we expect.
if (ie != null || ioe != null) {
// cancel the rest of the tasks, and close the pool
for (; i < uploads.size(); i++) {
Future<TransferFsImage.TransferResult> upload = uploads.get(i);
// The background thread may be blocked waiting in the throttler, so
// interrupt it.
upload.cancel(true);
}
// shutdown so we interrupt anything running and don't start anything new
executor.shutdownNow();
// this is a good bit longer than the thread timeout, just to make sure all the threads
// that are not doing any work also stop
executor.awaitTermination(500, TimeUnit.MILLISECONDS);
// re-throw the exception we got, since one of these two must be non-null
if (ie != null) {
throw ie;
} else if (ioe != null) {
throw ioe;
}
}
}
use of org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile in project hadoop by apache.
the class FSImage method saveFSImage.
/**
* Save the contents of the FS image to the file.
*/
void saveFSImage(SaveNamespaceContext context, StorageDirectory sd, NameNodeFile dstType) throws IOException {
long txid = context.getTxId();
File newFile = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE_NEW, txid);
File dstFile = NNStorage.getStorageFile(sd, dstType, txid);
FSImageFormatProtobuf.Saver saver = new FSImageFormatProtobuf.Saver(context);
FSImageCompression compression = FSImageCompression.createCompression(conf);
saver.save(newFile, compression);
MD5FileUtils.saveMD5File(dstFile, saver.getSavedDigest());
storage.setMostRecentCheckpointInfo(txid, Time.now());
}
use of org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile in project hadoop by apache.
the class FSImage method renameImageFileInDir.
private void renameImageFileInDir(StorageDirectory sd, NameNodeFile fromNnf, NameNodeFile toNnf, long txid, boolean renameMD5) throws IOException {
final File fromFile = NNStorage.getStorageFile(sd, fromNnf, txid);
final File toFile = NNStorage.getStorageFile(sd, toNnf, txid);
// renameTo fails on Windows if the destination file already exists.
if (LOG.isDebugEnabled()) {
LOG.debug("renaming " + fromFile.getAbsolutePath() + " to " + toFile.getAbsolutePath());
}
if (!fromFile.renameTo(toFile)) {
if (!toFile.delete() || !fromFile.renameTo(toFile)) {
throw new IOException("renaming " + fromFile.getAbsolutePath() + " to " + toFile.getAbsolutePath() + " FAILED");
}
}
if (renameMD5) {
MD5FileUtils.renameMD5File(fromFile, toFile);
}
}
use of org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile in project hadoop by apache.
the class FSImage method saveDigestAndRenameCheckpointImage.
/**
* This is called by the 2NN after having downloaded an image, and by
* the NN after having received a new image from the 2NN. It
* renames the image from fsimage_N.ckpt to fsimage_N and also
* saves the related .md5 file into place.
*/
public synchronized void saveDigestAndRenameCheckpointImage(NameNodeFile nnf, long txid, MD5Hash digest) throws IOException {
// Write and rename MD5 file
List<StorageDirectory> badSds = Lists.newArrayList();
for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.IMAGE)) {
File imageFile = NNStorage.getImageFile(sd, nnf, txid);
try {
MD5FileUtils.saveMD5File(imageFile, digest);
} catch (IOException ioe) {
badSds.add(sd);
}
}
storage.reportErrorsOnDirectories(badSds);
CheckpointFaultInjector.getInstance().afterMD5Rename();
// Rename image from tmp file
renameCheckpoint(txid, NameNodeFile.IMAGE_NEW, nnf, false);
// from now on
if (txid > storage.getMostRecentCheckpointTxId()) {
storage.setMostRecentCheckpointInfo(txid, Time.now());
}
}
use of org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile in project hadoop by apache.
the class ImageServlet method doPut.
@Override
protected void doPut(final HttpServletRequest request, final HttpServletResponse response) throws ServletException, IOException {
try {
ServletContext context = getServletContext();
final FSImage nnImage = NameNodeHttpServer.getFsImageFromContext(context);
final Configuration conf = (Configuration) getServletContext().getAttribute(JspHelper.CURRENT_CONF);
final PutImageParams parsedParams = new PutImageParams(request, response, conf);
final NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
validateRequest(context, conf, request, response, nnImage, parsedParams.getStorageInfoString());
UserGroupInformation.getCurrentUser().doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {
// if its not the active NN, then we need to notify the caller it was was the wrong
// target (regardless of the fact that we got the image)
HAServiceProtocol.HAServiceState state = NameNodeHttpServer.getNameNodeStateFromContext(getServletContext());
if (state != HAServiceProtocol.HAServiceState.ACTIVE) {
// we need a different response type here so the client can differentiate this
// from the failure to upload due to (1) security, or (2) other checkpoints already
// present
response.sendError(HttpServletResponse.SC_EXPECTATION_FAILED, "Nameode " + request.getLocalAddr() + " is currently not in a state which can " + "accept uploads of new fsimages. State: " + state);
return null;
}
final long txid = parsedParams.getTxId();
String remoteAddr = request.getRemoteAddr();
ImageUploadRequest imageRequest = new ImageUploadRequest(txid, remoteAddr);
final NameNodeFile nnf = parsedParams.getNameNodeFile();
// if the node is attempting to upload an older transaction, we ignore it
SortedSet<ImageUploadRequest> larger = currentlyDownloadingCheckpoints.tailSet(imageRequest);
if (larger.size() > 0) {
response.sendError(HttpServletResponse.SC_CONFLICT, "Another checkpointer is already in the process of uploading a" + " checkpoint made up to transaction ID " + larger.last());
return null;
}
//make sure no one else has started uploading one
if (!currentlyDownloadingCheckpoints.add(imageRequest)) {
response.sendError(HttpServletResponse.SC_CONFLICT, "Either current namenode is checkpointing or another" + " checkpointer is already in the process of " + "uploading a checkpoint made at transaction ID " + txid);
return null;
}
try {
if (nnImage.getStorage().findImageFile(nnf, txid) != null) {
response.sendError(HttpServletResponse.SC_CONFLICT, "Either current namenode has checkpointed or " + "another checkpointer already uploaded an " + "checkpoint for txid " + txid);
return null;
}
InputStream stream = request.getInputStream();
try {
long start = monotonicNow();
MD5Hash downloadImageDigest = TransferFsImage.handleUploadImageRequest(request, txid, nnImage.getStorage(), stream, parsedParams.getFileSize(), getThrottler(conf));
nnImage.saveDigestAndRenameCheckpointImage(nnf, txid, downloadImageDigest);
// Metrics non-null only when used inside name node
if (metrics != null) {
long elapsed = monotonicNow() - start;
metrics.addPutImage(elapsed);
}
// Now that we have a new checkpoint, we might be able to
// remove some old ones.
nnImage.purgeOldStorage(nnf);
} finally {
// remove the request once we've processed it, or it threw an error, so we
// aren't using it either
currentlyDownloadingCheckpoints.remove(imageRequest);
stream.close();
}
} finally {
nnImage.removeFromCheckpointing(txid);
}
return null;
}
});
} catch (Throwable t) {
String errMsg = "PutImage failed. " + StringUtils.stringifyException(t);
response.sendError(HttpServletResponse.SC_GONE, errMsg);
throw new IOException(errMsg);
}
}
Aggregations