use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto in project hadoop by apache.
the class TestJournalNode method testAcceptRecoveryBehavior.
/**
* Test that the JournalNode performs correctly as a Paxos
* <em>Acceptor</em> process.
*/
@Test(timeout = 100000)
public void testAcceptRecoveryBehavior() throws Exception {
// different proposals for the same decision.
try {
ch.prepareRecovery(1L).get();
fail("Did not throw IllegalState when trying to run paxos without an epoch");
} catch (ExecutionException ise) {
GenericTestUtils.assertExceptionContains("bad epoch", ise);
}
ch.newEpoch(1).get();
ch.setEpoch(1);
// prepare() with no previously accepted value and no logs present
PrepareRecoveryResponseProto prep = ch.prepareRecovery(1L).get();
System.err.println("Prep: " + prep);
assertFalse(prep.hasAcceptedInEpoch());
assertFalse(prep.hasSegmentState());
// Make a log segment, and prepare again -- this time should see the
// segment existing.
ch.startLogSegment(1L, NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION).get();
ch.sendEdits(1L, 1L, 1, QJMTestUtil.createTxnData(1, 1)).get();
prep = ch.prepareRecovery(1L).get();
System.err.println("Prep: " + prep);
assertFalse(prep.hasAcceptedInEpoch());
assertTrue(prep.hasSegmentState());
// accept() should save the accepted value in persistent storage
ch.acceptRecovery(prep.getSegmentState(), new URL("file:///dev/null")).get();
// So another prepare() call from a new epoch would return this value
ch.newEpoch(2);
ch.setEpoch(2);
prep = ch.prepareRecovery(1L).get();
assertEquals(1L, prep.getAcceptedInEpoch());
assertEquals(1L, prep.getSegmentState().getEndTxId());
// A prepare() or accept() call from an earlier epoch should now be rejected
ch.setEpoch(1);
try {
ch.prepareRecovery(1L).get();
fail("prepare from earlier epoch not rejected");
} catch (ExecutionException ioe) {
GenericTestUtils.assertExceptionContains("epoch 1 is less than the last promised epoch 2", ioe);
}
try {
ch.acceptRecovery(prep.getSegmentState(), new URL("file:///dev/null")).get();
fail("accept from earlier epoch not rejected");
} catch (ExecutionException ioe) {
GenericTestUtils.assertExceptionContains("epoch 1 is less than the last promised epoch 2", ioe);
}
}
use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto in project hadoop by apache.
the class SegmentRecoveryComparator method compare.
@Override
public int compare(Entry<AsyncLogger, PrepareRecoveryResponseProto> a, Entry<AsyncLogger, PrepareRecoveryResponseProto> b) {
PrepareRecoveryResponseProto r1 = a.getValue();
PrepareRecoveryResponseProto r2 = b.getValue();
// that doesn't.
if (r1.hasSegmentState() != r2.hasSegmentState()) {
return Booleans.compare(r1.hasSegmentState(), r2.hasSegmentState());
}
if (!r1.hasSegmentState()) {
// Call them equal.
return 0;
}
// They both have a segment.
SegmentStateProto r1Seg = r1.getSegmentState();
SegmentStateProto r2Seg = r2.getSegmentState();
Preconditions.checkArgument(r1Seg.getStartTxId() == r2Seg.getStartTxId(), "Should only be called with responses for corresponding segments: " + "%s and %s do not have the same start txid.", r1, r2);
// the finalized one is greater.
if (r1Seg.getIsInProgress() != r2Seg.getIsInProgress()) {
return Booleans.compare(!r1Seg.getIsInProgress(), !r2Seg.getIsInProgress());
}
if (!r1Seg.getIsInProgress()) {
// If both are finalized, they should match lengths
if (r1Seg.getEndTxId() != r2Seg.getEndTxId()) {
throw new AssertionError("finalized segs with different lengths: " + r1 + ", " + r2);
}
return 0;
}
// Both are in-progress.
long r1SeenEpoch = Math.max(r1.getAcceptedInEpoch(), r1.getLastWriterEpoch());
long r2SeenEpoch = Math.max(r2.getAcceptedInEpoch(), r2.getLastWriterEpoch());
return ComparisonChain.start().compare(r1SeenEpoch, r2SeenEpoch).compare(r1.getSegmentState().getEndTxId(), r2.getSegmentState().getEndTxId()).result();
}
use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto in project hadoop by apache.
the class Journal method prepareRecovery.
/**
* @see QJournalProtocol#prepareRecovery(RequestInfo, long)
*/
public synchronized PrepareRecoveryResponseProto prepareRecovery(RequestInfo reqInfo, long segmentTxId) throws IOException {
checkFormatted();
checkRequest(reqInfo);
abortCurSegment();
PrepareRecoveryResponseProto.Builder builder = PrepareRecoveryResponseProto.newBuilder();
PersistedRecoveryPaxosData previouslyAccepted = getPersistedPaxosData(segmentTxId);
completeHalfDoneAcceptRecovery(previouslyAccepted);
SegmentStateProto segInfo = getSegmentInfo(segmentTxId);
boolean hasFinalizedSegment = segInfo != null && !segInfo.getIsInProgress();
if (previouslyAccepted != null && !hasFinalizedSegment) {
SegmentStateProto acceptedState = previouslyAccepted.getSegmentState();
assert acceptedState.getEndTxId() == segInfo.getEndTxId() : "prev accepted: " + TextFormat.shortDebugString(previouslyAccepted) + "\n" + "on disk: " + TextFormat.shortDebugString(segInfo);
builder.setAcceptedInEpoch(previouslyAccepted.getAcceptedInEpoch()).setSegmentState(previouslyAccepted.getSegmentState());
} else {
if (segInfo != null) {
builder.setSegmentState(segInfo);
}
}
builder.setLastWriterEpoch(lastWriterEpoch.get());
if (committedTxnId.get() != HdfsServerConstants.INVALID_TXID) {
builder.setLastCommittedTxId(committedTxnId.get());
}
PrepareRecoveryResponseProto resp = builder.build();
LOG.info("Prepared recovery for segment " + segmentTxId + ": " + TextFormat.shortDebugString(resp));
return resp;
}
use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto in project hadoop by apache.
the class QuorumJournalManager method recoverUnclosedSegment.
/**
* Run recovery/synchronization for a specific segment.
* Postconditions:
* <ul>
* <li>This segment will be finalized on a majority
* of nodes.</li>
* <li>All nodes which contain the finalized segment will
* agree on the length.</li>
* </ul>
*
* @param segmentTxId the starting txid of the segment
* @throws IOException
*/
private void recoverUnclosedSegment(long segmentTxId) throws IOException {
Preconditions.checkArgument(segmentTxId > 0);
LOG.info("Beginning recovery of unclosed segment starting at txid " + segmentTxId);
// Step 1. Prepare recovery
QuorumCall<AsyncLogger, PrepareRecoveryResponseProto> prepare = loggers.prepareRecovery(segmentTxId);
Map<AsyncLogger, PrepareRecoveryResponseProto> prepareResponses = loggers.waitForWriteQuorum(prepare, prepareRecoveryTimeoutMs, "prepareRecovery(" + segmentTxId + ")");
LOG.info("Recovery prepare phase complete. Responses:\n" + QuorumCall.mapToString(prepareResponses));
// Determine the logger who either:
// a) Has already accepted a previous proposal that's higher than any
// other
//
// OR, if no such logger exists:
//
// b) Has the longest log starting at this transaction ID
// TODO: we should collect any "ties" and pass the URL for all of them
// when syncing, so we can tolerate failure during recovery better.
Entry<AsyncLogger, PrepareRecoveryResponseProto> bestEntry = Collections.max(prepareResponses.entrySet(), SegmentRecoveryComparator.INSTANCE);
AsyncLogger bestLogger = bestEntry.getKey();
PrepareRecoveryResponseProto bestResponse = bestEntry.getValue();
// Log the above decision, check invariants.
if (bestResponse.hasAcceptedInEpoch()) {
LOG.info("Using already-accepted recovery for segment " + "starting at txid " + segmentTxId + ": " + bestEntry);
} else if (bestResponse.hasSegmentState()) {
LOG.info("Using longest log: " + bestEntry);
} else {
// but a bug in the comparator might cause us to get here.
for (PrepareRecoveryResponseProto resp : prepareResponses.values()) {
assert !resp.hasSegmentState() : "One of the loggers had a response, but no best logger " + "was found.";
}
LOG.info("None of the responders had a log to recover: " + QuorumCall.mapToString(prepareResponses));
return;
}
SegmentStateProto logToSync = bestResponse.getSegmentState();
assert segmentTxId == logToSync.getStartTxId();
// txid than the txid we intend to truncate to
for (Map.Entry<AsyncLogger, PrepareRecoveryResponseProto> e : prepareResponses.entrySet()) {
AsyncLogger logger = e.getKey();
PrepareRecoveryResponseProto resp = e.getValue();
if (resp.hasLastCommittedTxId() && resp.getLastCommittedTxId() > logToSync.getEndTxId()) {
throw new AssertionError("Decided to synchronize log to " + logToSync + " but logger " + logger + " had seen txid " + resp.getLastCommittedTxId() + " committed");
}
}
URL syncFromUrl = bestLogger.buildURLToFetchLogs(segmentTxId);
QuorumCall<AsyncLogger, Void> accept = loggers.acceptRecovery(logToSync, syncFromUrl);
loggers.waitForWriteQuorum(accept, acceptRecoveryTimeoutMs, "acceptRecovery(" + TextFormat.shortDebugString(logToSync) + ")");
// If one of the loggers above missed the synchronization step above, but
// we send a finalize() here, that's OK. It validates the log before
// finalizing. Hence, even if it is not "in sync", it won't incorrectly
// finalize.
QuorumCall<AsyncLogger, Void> finalize = loggers.finalizeLogSegment(logToSync.getStartTxId(), logToSync.getEndTxId());
loggers.waitForWriteQuorum(finalize, finalizeSegmentTimeoutMs, String.format("finalizeLogSegment(%s-%s)", logToSync.getStartTxId(), logToSync.getEndTxId()));
}
Aggregations