use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData in project hadoop by apache.
the class Journal method acceptRecovery.
/**
* @see QJournalProtocol#acceptRecovery(RequestInfo, QJournalProtocolProtos.SegmentStateProto, URL)
*/
public synchronized void acceptRecovery(RequestInfo reqInfo, SegmentStateProto segment, URL fromUrl) throws IOException {
checkFormatted();
checkRequest(reqInfo);
abortCurSegment();
long segmentTxId = segment.getStartTxId();
// Basic sanity checks that the segment is well-formed and contains
// at least one transaction.
Preconditions.checkArgument(segment.getEndTxId() > 0 && segment.getEndTxId() >= segmentTxId, "bad recovery state for segment %s: %s", segmentTxId, TextFormat.shortDebugString(segment));
PersistedRecoveryPaxosData oldData = getPersistedPaxosData(segmentTxId);
PersistedRecoveryPaxosData newData = PersistedRecoveryPaxosData.newBuilder().setAcceptedInEpoch(reqInfo.getEpoch()).setSegmentState(segment).build();
// checkRequest() call above should filter non-increasing epoch numbers.
if (oldData != null) {
alwaysAssert(oldData.getAcceptedInEpoch() <= reqInfo.getEpoch(), "Bad paxos transition, out-of-order epochs.\nOld: %s\nNew: %s\n", oldData, newData);
}
File syncedFile = null;
SegmentStateProto currentSegment = getSegmentInfo(segmentTxId);
if (currentSegment == null || currentSegment.getEndTxId() != segment.getEndTxId()) {
if (currentSegment == null) {
LOG.info("Synchronizing log " + TextFormat.shortDebugString(segment) + ": no current segment in place");
// Update the highest txid for lag metrics
updateHighestWrittenTxId(Math.max(segment.getEndTxId(), highestWrittenTxId));
} else {
LOG.info("Synchronizing log " + TextFormat.shortDebugString(segment) + ": old segment " + TextFormat.shortDebugString(currentSegment) + " is not the right length");
// which are already Committed.
if (txnRange(currentSegment).containsLong(committedTxnId.get()) && !txnRange(segment).containsLong(committedTxnId.get())) {
throw new AssertionError("Cannot replace segment " + TextFormat.shortDebugString(currentSegment) + " with new segment " + TextFormat.shortDebugString(segment) + ": would discard already-committed txn " + committedTxnId.get());
}
// Another paranoid check: we should not be asked to synchronize a log
// on top of a finalized segment.
alwaysAssert(currentSegment.getIsInProgress(), "Should never be asked to synchronize a different log on top of an " + "already-finalized segment");
// used for lag metrics.
if (txnRange(currentSegment).containsLong(highestWrittenTxId)) {
updateHighestWrittenTxId(segment.getEndTxId());
}
}
syncedFile = syncLog(reqInfo, segment, fromUrl);
} else {
LOG.info("Skipping download of log " + TextFormat.shortDebugString(segment) + ": already have up-to-date logs");
}
// This is one of the few places in the protocol where we have a single
// RPC that results in two distinct actions:
//
// - 1) Downloads the new log segment data (above)
// - 2) Records the new Paxos data about the synchronized segment (below)
//
// These need to be treated as a transaction from the perspective
// of any external process. We do this by treating the persistPaxosData()
// success as the "commit" of an atomic transaction. If we fail before
// this point, the downloaded edit log will only exist at a temporary
// path, and thus not change any externally visible state. If we fail
// after this point, then any future prepareRecovery() call will see
// the Paxos data, and by calling completeHalfDoneAcceptRecovery() will
// roll forward the rename of the referenced log file.
//
// See also: HDFS-3955
//
// The fault points here are exercised by the randomized fault injection
// test case to ensure that this atomic "transaction" operates correctly.
JournalFaultInjector.get().beforePersistPaxosData();
persistPaxosData(segmentTxId, newData);
JournalFaultInjector.get().afterPersistPaxosData();
if (syncedFile != null) {
FileUtil.replaceFile(syncedFile, storage.getInProgressEditLog(segmentTxId));
}
LOG.info("Accepted recovery for segment " + segmentTxId + ": " + TextFormat.shortDebugString(newData));
}
use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData in project hadoop by apache.
the class Journal method prepareRecovery.
/**
* @see QJournalProtocol#prepareRecovery(RequestInfo, long)
*/
public synchronized PrepareRecoveryResponseProto prepareRecovery(RequestInfo reqInfo, long segmentTxId) throws IOException {
checkFormatted();
checkRequest(reqInfo);
abortCurSegment();
PrepareRecoveryResponseProto.Builder builder = PrepareRecoveryResponseProto.newBuilder();
PersistedRecoveryPaxosData previouslyAccepted = getPersistedPaxosData(segmentTxId);
completeHalfDoneAcceptRecovery(previouslyAccepted);
SegmentStateProto segInfo = getSegmentInfo(segmentTxId);
boolean hasFinalizedSegment = segInfo != null && !segInfo.getIsInProgress();
if (previouslyAccepted != null && !hasFinalizedSegment) {
SegmentStateProto acceptedState = previouslyAccepted.getSegmentState();
assert acceptedState.getEndTxId() == segInfo.getEndTxId() : "prev accepted: " + TextFormat.shortDebugString(previouslyAccepted) + "\n" + "on disk: " + TextFormat.shortDebugString(segInfo);
builder.setAcceptedInEpoch(previouslyAccepted.getAcceptedInEpoch()).setSegmentState(previouslyAccepted.getSegmentState());
} else {
if (segInfo != null) {
builder.setSegmentState(segInfo);
}
}
builder.setLastWriterEpoch(lastWriterEpoch.get());
if (committedTxnId.get() != HdfsServerConstants.INVALID_TXID) {
builder.setLastCommittedTxId(committedTxnId.get());
}
PrepareRecoveryResponseProto resp = builder.build();
LOG.info("Prepared recovery for segment " + segmentTxId + ": " + TextFormat.shortDebugString(resp));
return resp;
}
use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData in project hadoop by apache.
the class Journal method getPersistedPaxosData.
/**
* Retrieve the persisted data for recovering the given segment from disk.
*/
private PersistedRecoveryPaxosData getPersistedPaxosData(long segmentTxId) throws IOException {
File f = storage.getPaxosFile(segmentTxId);
if (!f.exists()) {
// Default instance has no fields filled in (they're optional)
return null;
}
InputStream in = new FileInputStream(f);
try {
PersistedRecoveryPaxosData ret = PersistedRecoveryPaxosData.parseDelimitedFrom(in);
Preconditions.checkState(ret != null && ret.getSegmentState().getStartTxId() == segmentTxId, "Bad persisted data for segment %s: %s", segmentTxId, ret);
return ret;
} finally {
IOUtils.closeStream(in);
}
}
Aggregations