Search in sources :

Example 1 with PrepareRecoveryResponseProto

use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto in project hadoop by apache.

the class TestJournalNode method testAcceptRecoveryBehavior.

/**
   * Test that the JournalNode performs correctly as a Paxos
   * <em>Acceptor</em> process.
   */
@Test(timeout = 100000)
public void testAcceptRecoveryBehavior() throws Exception {
    // different proposals for the same decision.
    try {
        ch.prepareRecovery(1L).get();
        fail("Did not throw IllegalState when trying to run paxos without an epoch");
    } catch (ExecutionException ise) {
        GenericTestUtils.assertExceptionContains("bad epoch", ise);
    }
    ch.newEpoch(1).get();
    ch.setEpoch(1);
    // prepare() with no previously accepted value and no logs present
    PrepareRecoveryResponseProto prep = ch.prepareRecovery(1L).get();
    System.err.println("Prep: " + prep);
    assertFalse(prep.hasAcceptedInEpoch());
    assertFalse(prep.hasSegmentState());
    // Make a log segment, and prepare again -- this time should see the
    // segment existing.
    ch.startLogSegment(1L, NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION).get();
    ch.sendEdits(1L, 1L, 1, QJMTestUtil.createTxnData(1, 1)).get();
    prep = ch.prepareRecovery(1L).get();
    System.err.println("Prep: " + prep);
    assertFalse(prep.hasAcceptedInEpoch());
    assertTrue(prep.hasSegmentState());
    // accept() should save the accepted value in persistent storage
    ch.acceptRecovery(prep.getSegmentState(), new URL("file:///dev/null")).get();
    // So another prepare() call from a new epoch would return this value
    ch.newEpoch(2);
    ch.setEpoch(2);
    prep = ch.prepareRecovery(1L).get();
    assertEquals(1L, prep.getAcceptedInEpoch());
    assertEquals(1L, prep.getSegmentState().getEndTxId());
    // A prepare() or accept() call from an earlier epoch should now be rejected
    ch.setEpoch(1);
    try {
        ch.prepareRecovery(1L).get();
        fail("prepare from earlier epoch not rejected");
    } catch (ExecutionException ioe) {
        GenericTestUtils.assertExceptionContains("epoch 1 is less than the last promised epoch 2", ioe);
    }
    try {
        ch.acceptRecovery(prep.getSegmentState(), new URL("file:///dev/null")).get();
        fail("accept from earlier epoch not rejected");
    } catch (ExecutionException ioe) {
        GenericTestUtils.assertExceptionContains("epoch 1 is less than the last promised epoch 2", ioe);
    }
}
Also used : ExecutionException(java.util.concurrent.ExecutionException) PrepareRecoveryResponseProto(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto) URL(java.net.URL) Test(org.junit.Test)

Example 2 with PrepareRecoveryResponseProto

use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto in project hadoop by apache.

the class SegmentRecoveryComparator method compare.

@Override
public int compare(Entry<AsyncLogger, PrepareRecoveryResponseProto> a, Entry<AsyncLogger, PrepareRecoveryResponseProto> b) {
    PrepareRecoveryResponseProto r1 = a.getValue();
    PrepareRecoveryResponseProto r2 = b.getValue();
    // that doesn't.
    if (r1.hasSegmentState() != r2.hasSegmentState()) {
        return Booleans.compare(r1.hasSegmentState(), r2.hasSegmentState());
    }
    if (!r1.hasSegmentState()) {
        // Call them equal.
        return 0;
    }
    // They both have a segment.
    SegmentStateProto r1Seg = r1.getSegmentState();
    SegmentStateProto r2Seg = r2.getSegmentState();
    Preconditions.checkArgument(r1Seg.getStartTxId() == r2Seg.getStartTxId(), "Should only be called with responses for corresponding segments: " + "%s and %s do not have the same start txid.", r1, r2);
    // the finalized one is greater.
    if (r1Seg.getIsInProgress() != r2Seg.getIsInProgress()) {
        return Booleans.compare(!r1Seg.getIsInProgress(), !r2Seg.getIsInProgress());
    }
    if (!r1Seg.getIsInProgress()) {
        // If both are finalized, they should match lengths
        if (r1Seg.getEndTxId() != r2Seg.getEndTxId()) {
            throw new AssertionError("finalized segs with different lengths: " + r1 + ", " + r2);
        }
        return 0;
    }
    // Both are in-progress.
    long r1SeenEpoch = Math.max(r1.getAcceptedInEpoch(), r1.getLastWriterEpoch());
    long r2SeenEpoch = Math.max(r2.getAcceptedInEpoch(), r2.getLastWriterEpoch());
    return ComparisonChain.start().compare(r1SeenEpoch, r2SeenEpoch).compare(r1.getSegmentState().getEndTxId(), r2.getSegmentState().getEndTxId()).result();
}
Also used : SegmentStateProto(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto) PrepareRecoveryResponseProto(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto)

Example 3 with PrepareRecoveryResponseProto

use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto in project hadoop by apache.

the class Journal method prepareRecovery.

/**
   * @see QJournalProtocol#prepareRecovery(RequestInfo, long)
   */
public synchronized PrepareRecoveryResponseProto prepareRecovery(RequestInfo reqInfo, long segmentTxId) throws IOException {
    checkFormatted();
    checkRequest(reqInfo);
    abortCurSegment();
    PrepareRecoveryResponseProto.Builder builder = PrepareRecoveryResponseProto.newBuilder();
    PersistedRecoveryPaxosData previouslyAccepted = getPersistedPaxosData(segmentTxId);
    completeHalfDoneAcceptRecovery(previouslyAccepted);
    SegmentStateProto segInfo = getSegmentInfo(segmentTxId);
    boolean hasFinalizedSegment = segInfo != null && !segInfo.getIsInProgress();
    if (previouslyAccepted != null && !hasFinalizedSegment) {
        SegmentStateProto acceptedState = previouslyAccepted.getSegmentState();
        assert acceptedState.getEndTxId() == segInfo.getEndTxId() : "prev accepted: " + TextFormat.shortDebugString(previouslyAccepted) + "\n" + "on disk:       " + TextFormat.shortDebugString(segInfo);
        builder.setAcceptedInEpoch(previouslyAccepted.getAcceptedInEpoch()).setSegmentState(previouslyAccepted.getSegmentState());
    } else {
        if (segInfo != null) {
            builder.setSegmentState(segInfo);
        }
    }
    builder.setLastWriterEpoch(lastWriterEpoch.get());
    if (committedTxnId.get() != HdfsServerConstants.INVALID_TXID) {
        builder.setLastCommittedTxId(committedTxnId.get());
    }
    PrepareRecoveryResponseProto resp = builder.build();
    LOG.info("Prepared recovery for segment " + segmentTxId + ": " + TextFormat.shortDebugString(resp));
    return resp;
}
Also used : SegmentStateProto(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto) PersistedRecoveryPaxosData(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData) PrepareRecoveryResponseProto(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto)

Example 4 with PrepareRecoveryResponseProto

use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto in project hadoop by apache.

the class QuorumJournalManager method recoverUnclosedSegment.

/**
   * Run recovery/synchronization for a specific segment.
   * Postconditions:
   * <ul>
   * <li>This segment will be finalized on a majority
   * of nodes.</li>
   * <li>All nodes which contain the finalized segment will
   * agree on the length.</li>
   * </ul>
   * 
   * @param segmentTxId the starting txid of the segment
   * @throws IOException
   */
private void recoverUnclosedSegment(long segmentTxId) throws IOException {
    Preconditions.checkArgument(segmentTxId > 0);
    LOG.info("Beginning recovery of unclosed segment starting at txid " + segmentTxId);
    // Step 1. Prepare recovery
    QuorumCall<AsyncLogger, PrepareRecoveryResponseProto> prepare = loggers.prepareRecovery(segmentTxId);
    Map<AsyncLogger, PrepareRecoveryResponseProto> prepareResponses = loggers.waitForWriteQuorum(prepare, prepareRecoveryTimeoutMs, "prepareRecovery(" + segmentTxId + ")");
    LOG.info("Recovery prepare phase complete. Responses:\n" + QuorumCall.mapToString(prepareResponses));
    // Determine the logger who either:
    // a) Has already accepted a previous proposal that's higher than any
    //    other
    //
    //  OR, if no such logger exists:
    //
    // b) Has the longest log starting at this transaction ID
    // TODO: we should collect any "ties" and pass the URL for all of them
    // when syncing, so we can tolerate failure during recovery better.
    Entry<AsyncLogger, PrepareRecoveryResponseProto> bestEntry = Collections.max(prepareResponses.entrySet(), SegmentRecoveryComparator.INSTANCE);
    AsyncLogger bestLogger = bestEntry.getKey();
    PrepareRecoveryResponseProto bestResponse = bestEntry.getValue();
    // Log the above decision, check invariants.
    if (bestResponse.hasAcceptedInEpoch()) {
        LOG.info("Using already-accepted recovery for segment " + "starting at txid " + segmentTxId + ": " + bestEntry);
    } else if (bestResponse.hasSegmentState()) {
        LOG.info("Using longest log: " + bestEntry);
    } else {
        // but a bug in the comparator might cause us to get here.
        for (PrepareRecoveryResponseProto resp : prepareResponses.values()) {
            assert !resp.hasSegmentState() : "One of the loggers had a response, but no best logger " + "was found.";
        }
        LOG.info("None of the responders had a log to recover: " + QuorumCall.mapToString(prepareResponses));
        return;
    }
    SegmentStateProto logToSync = bestResponse.getSegmentState();
    assert segmentTxId == logToSync.getStartTxId();
    // txid than the txid we intend to truncate to
    for (Map.Entry<AsyncLogger, PrepareRecoveryResponseProto> e : prepareResponses.entrySet()) {
        AsyncLogger logger = e.getKey();
        PrepareRecoveryResponseProto resp = e.getValue();
        if (resp.hasLastCommittedTxId() && resp.getLastCommittedTxId() > logToSync.getEndTxId()) {
            throw new AssertionError("Decided to synchronize log to " + logToSync + " but logger " + logger + " had seen txid " + resp.getLastCommittedTxId() + " committed");
        }
    }
    URL syncFromUrl = bestLogger.buildURLToFetchLogs(segmentTxId);
    QuorumCall<AsyncLogger, Void> accept = loggers.acceptRecovery(logToSync, syncFromUrl);
    loggers.waitForWriteQuorum(accept, acceptRecoveryTimeoutMs, "acceptRecovery(" + TextFormat.shortDebugString(logToSync) + ")");
    // If one of the loggers above missed the synchronization step above, but
    // we send a finalize() here, that's OK. It validates the log before
    // finalizing. Hence, even if it is not "in sync", it won't incorrectly
    // finalize.
    QuorumCall<AsyncLogger, Void> finalize = loggers.finalizeLogSegment(logToSync.getStartTxId(), logToSync.getEndTxId());
    loggers.waitForWriteQuorum(finalize, finalizeSegmentTimeoutMs, String.format("finalizeLogSegment(%s-%s)", logToSync.getStartTxId(), logToSync.getEndTxId()));
}
Also used : SegmentStateProto(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto) PrepareRecoveryResponseProto(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto) Map(java.util.Map) URL(java.net.URL)

Aggregations

PrepareRecoveryResponseProto (org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto)4 SegmentStateProto (org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto)3 URL (java.net.URL)2 Map (java.util.Map)1 ExecutionException (java.util.concurrent.ExecutionException)1 PersistedRecoveryPaxosData (org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData)1 Test (org.junit.Test)1