Search in sources :

Example 6 with SegmentStateProto

use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto in project hadoop by apache.

the class QuorumJournalManager method recoverUnclosedSegment.

/**
   * Run recovery/synchronization for a specific segment.
   * Postconditions:
   * <ul>
   * <li>This segment will be finalized on a majority
   * of nodes.</li>
   * <li>All nodes which contain the finalized segment will
   * agree on the length.</li>
   * </ul>
   * 
   * @param segmentTxId the starting txid of the segment
   * @throws IOException
   */
private void recoverUnclosedSegment(long segmentTxId) throws IOException {
    Preconditions.checkArgument(segmentTxId > 0);
    LOG.info("Beginning recovery of unclosed segment starting at txid " + segmentTxId);
    // Step 1. Prepare recovery
    QuorumCall<AsyncLogger, PrepareRecoveryResponseProto> prepare = loggers.prepareRecovery(segmentTxId);
    Map<AsyncLogger, PrepareRecoveryResponseProto> prepareResponses = loggers.waitForWriteQuorum(prepare, prepareRecoveryTimeoutMs, "prepareRecovery(" + segmentTxId + ")");
    LOG.info("Recovery prepare phase complete. Responses:\n" + QuorumCall.mapToString(prepareResponses));
    // Determine the logger who either:
    // a) Has already accepted a previous proposal that's higher than any
    //    other
    //
    //  OR, if no such logger exists:
    //
    // b) Has the longest log starting at this transaction ID
    // TODO: we should collect any "ties" and pass the URL for all of them
    // when syncing, so we can tolerate failure during recovery better.
    Entry<AsyncLogger, PrepareRecoveryResponseProto> bestEntry = Collections.max(prepareResponses.entrySet(), SegmentRecoveryComparator.INSTANCE);
    AsyncLogger bestLogger = bestEntry.getKey();
    PrepareRecoveryResponseProto bestResponse = bestEntry.getValue();
    // Log the above decision, check invariants.
    if (bestResponse.hasAcceptedInEpoch()) {
        LOG.info("Using already-accepted recovery for segment " + "starting at txid " + segmentTxId + ": " + bestEntry);
    } else if (bestResponse.hasSegmentState()) {
        LOG.info("Using longest log: " + bestEntry);
    } else {
        // but a bug in the comparator might cause us to get here.
        for (PrepareRecoveryResponseProto resp : prepareResponses.values()) {
            assert !resp.hasSegmentState() : "One of the loggers had a response, but no best logger " + "was found.";
        }
        LOG.info("None of the responders had a log to recover: " + QuorumCall.mapToString(prepareResponses));
        return;
    }
    SegmentStateProto logToSync = bestResponse.getSegmentState();
    assert segmentTxId == logToSync.getStartTxId();
    // txid than the txid we intend to truncate to
    for (Map.Entry<AsyncLogger, PrepareRecoveryResponseProto> e : prepareResponses.entrySet()) {
        AsyncLogger logger = e.getKey();
        PrepareRecoveryResponseProto resp = e.getValue();
        if (resp.hasLastCommittedTxId() && resp.getLastCommittedTxId() > logToSync.getEndTxId()) {
            throw new AssertionError("Decided to synchronize log to " + logToSync + " but logger " + logger + " had seen txid " + resp.getLastCommittedTxId() + " committed");
        }
    }
    URL syncFromUrl = bestLogger.buildURLToFetchLogs(segmentTxId);
    QuorumCall<AsyncLogger, Void> accept = loggers.acceptRecovery(logToSync, syncFromUrl);
    loggers.waitForWriteQuorum(accept, acceptRecoveryTimeoutMs, "acceptRecovery(" + TextFormat.shortDebugString(logToSync) + ")");
    // If one of the loggers above missed the synchronization step above, but
    // we send a finalize() here, that's OK. It validates the log before
    // finalizing. Hence, even if it is not "in sync", it won't incorrectly
    // finalize.
    QuorumCall<AsyncLogger, Void> finalize = loggers.finalizeLogSegment(logToSync.getStartTxId(), logToSync.getEndTxId());
    loggers.waitForWriteQuorum(finalize, finalizeSegmentTimeoutMs, String.format("finalizeLogSegment(%s-%s)", logToSync.getStartTxId(), logToSync.getEndTxId()));
}
Also used : SegmentStateProto(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto) PrepareRecoveryResponseProto(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto) Map(java.util.Map) URL(java.net.URL)

Aggregations

SegmentStateProto (org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto)6 PrepareRecoveryResponseProto (org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto)3 PersistedRecoveryPaxosData (org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData)2 EditLogFile (org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 File (java.io.File)1 URL (java.net.URL)1 Map (java.util.Map)1 BestEffortLongFile (org.apache.hadoop.hdfs.util.BestEffortLongFile)1 PersistentLongFile (org.apache.hadoop.hdfs.util.PersistentLongFile)1 Test (org.junit.Test)1