Search in sources :

Example 1 with PersistedRecoveryPaxosData

use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData in project hadoop by apache.

the class Journal method acceptRecovery.

/**
   * @see QJournalProtocol#acceptRecovery(RequestInfo, QJournalProtocolProtos.SegmentStateProto, URL)
   */
public synchronized void acceptRecovery(RequestInfo reqInfo, SegmentStateProto segment, URL fromUrl) throws IOException {
    checkFormatted();
    checkRequest(reqInfo);
    abortCurSegment();
    long segmentTxId = segment.getStartTxId();
    // Basic sanity checks that the segment is well-formed and contains
    // at least one transaction.
    Preconditions.checkArgument(segment.getEndTxId() > 0 && segment.getEndTxId() >= segmentTxId, "bad recovery state for segment %s: %s", segmentTxId, TextFormat.shortDebugString(segment));
    PersistedRecoveryPaxosData oldData = getPersistedPaxosData(segmentTxId);
    PersistedRecoveryPaxosData newData = PersistedRecoveryPaxosData.newBuilder().setAcceptedInEpoch(reqInfo.getEpoch()).setSegmentState(segment).build();
    // checkRequest() call above should filter non-increasing epoch numbers.
    if (oldData != null) {
        alwaysAssert(oldData.getAcceptedInEpoch() <= reqInfo.getEpoch(), "Bad paxos transition, out-of-order epochs.\nOld: %s\nNew: %s\n", oldData, newData);
    }
    File syncedFile = null;
    SegmentStateProto currentSegment = getSegmentInfo(segmentTxId);
    if (currentSegment == null || currentSegment.getEndTxId() != segment.getEndTxId()) {
        if (currentSegment == null) {
            LOG.info("Synchronizing log " + TextFormat.shortDebugString(segment) + ": no current segment in place");
            // Update the highest txid for lag metrics
            updateHighestWrittenTxId(Math.max(segment.getEndTxId(), highestWrittenTxId));
        } else {
            LOG.info("Synchronizing log " + TextFormat.shortDebugString(segment) + ": old segment " + TextFormat.shortDebugString(currentSegment) + " is not the right length");
            // which are already Committed.
            if (txnRange(currentSegment).containsLong(committedTxnId.get()) && !txnRange(segment).containsLong(committedTxnId.get())) {
                throw new AssertionError("Cannot replace segment " + TextFormat.shortDebugString(currentSegment) + " with new segment " + TextFormat.shortDebugString(segment) + ": would discard already-committed txn " + committedTxnId.get());
            }
            // Another paranoid check: we should not be asked to synchronize a log
            // on top of a finalized segment.
            alwaysAssert(currentSegment.getIsInProgress(), "Should never be asked to synchronize a different log on top of an " + "already-finalized segment");
            // used for lag metrics.
            if (txnRange(currentSegment).containsLong(highestWrittenTxId)) {
                updateHighestWrittenTxId(segment.getEndTxId());
            }
        }
        syncedFile = syncLog(reqInfo, segment, fromUrl);
    } else {
        LOG.info("Skipping download of log " + TextFormat.shortDebugString(segment) + ": already have up-to-date logs");
    }
    // This is one of the few places in the protocol where we have a single
    // RPC that results in two distinct actions:
    //
    // - 1) Downloads the new log segment data (above)
    // - 2) Records the new Paxos data about the synchronized segment (below)
    //
    // These need to be treated as a transaction from the perspective
    // of any external process. We do this by treating the persistPaxosData()
    // success as the "commit" of an atomic transaction. If we fail before
    // this point, the downloaded edit log will only exist at a temporary
    // path, and thus not change any externally visible state. If we fail
    // after this point, then any future prepareRecovery() call will see
    // the Paxos data, and by calling completeHalfDoneAcceptRecovery() will
    // roll forward the rename of the referenced log file.
    //
    // See also: HDFS-3955
    //
    // The fault points here are exercised by the randomized fault injection
    // test case to ensure that this atomic "transaction" operates correctly.
    JournalFaultInjector.get().beforePersistPaxosData();
    persistPaxosData(segmentTxId, newData);
    JournalFaultInjector.get().afterPersistPaxosData();
    if (syncedFile != null) {
        FileUtil.replaceFile(syncedFile, storage.getInProgressEditLog(segmentTxId));
    }
    LOG.info("Accepted recovery for segment " + segmentTxId + ": " + TextFormat.shortDebugString(newData));
}
Also used : SegmentStateProto(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto) PersistedRecoveryPaxosData(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData) EditLogFile(org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile) PersistentLongFile(org.apache.hadoop.hdfs.util.PersistentLongFile) BestEffortLongFile(org.apache.hadoop.hdfs.util.BestEffortLongFile) File(java.io.File)

Example 2 with PersistedRecoveryPaxosData

use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData in project hadoop by apache.

the class Journal method prepareRecovery.

/**
   * @see QJournalProtocol#prepareRecovery(RequestInfo, long)
   */
public synchronized PrepareRecoveryResponseProto prepareRecovery(RequestInfo reqInfo, long segmentTxId) throws IOException {
    checkFormatted();
    checkRequest(reqInfo);
    abortCurSegment();
    PrepareRecoveryResponseProto.Builder builder = PrepareRecoveryResponseProto.newBuilder();
    PersistedRecoveryPaxosData previouslyAccepted = getPersistedPaxosData(segmentTxId);
    completeHalfDoneAcceptRecovery(previouslyAccepted);
    SegmentStateProto segInfo = getSegmentInfo(segmentTxId);
    boolean hasFinalizedSegment = segInfo != null && !segInfo.getIsInProgress();
    if (previouslyAccepted != null && !hasFinalizedSegment) {
        SegmentStateProto acceptedState = previouslyAccepted.getSegmentState();
        assert acceptedState.getEndTxId() == segInfo.getEndTxId() : "prev accepted: " + TextFormat.shortDebugString(previouslyAccepted) + "\n" + "on disk:       " + TextFormat.shortDebugString(segInfo);
        builder.setAcceptedInEpoch(previouslyAccepted.getAcceptedInEpoch()).setSegmentState(previouslyAccepted.getSegmentState());
    } else {
        if (segInfo != null) {
            builder.setSegmentState(segInfo);
        }
    }
    builder.setLastWriterEpoch(lastWriterEpoch.get());
    if (committedTxnId.get() != HdfsServerConstants.INVALID_TXID) {
        builder.setLastCommittedTxId(committedTxnId.get());
    }
    PrepareRecoveryResponseProto resp = builder.build();
    LOG.info("Prepared recovery for segment " + segmentTxId + ": " + TextFormat.shortDebugString(resp));
    return resp;
}
Also used : SegmentStateProto(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto) PersistedRecoveryPaxosData(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData) PrepareRecoveryResponseProto(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto)

Example 3 with PersistedRecoveryPaxosData

use of org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData in project hadoop by apache.

the class Journal method getPersistedPaxosData.

/**
   * Retrieve the persisted data for recovering the given segment from disk.
   */
private PersistedRecoveryPaxosData getPersistedPaxosData(long segmentTxId) throws IOException {
    File f = storage.getPaxosFile(segmentTxId);
    if (!f.exists()) {
        // Default instance has no fields filled in (they're optional)
        return null;
    }
    InputStream in = new FileInputStream(f);
    try {
        PersistedRecoveryPaxosData ret = PersistedRecoveryPaxosData.parseDelimitedFrom(in);
        Preconditions.checkState(ret != null && ret.getSegmentState().getStartTxId() == segmentTxId, "Bad persisted data for segment %s: %s", segmentTxId, ret);
        return ret;
    } finally {
        IOUtils.closeStream(in);
    }
}
Also used : PersistedRecoveryPaxosData(org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) EditLogFile(org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile) PersistentLongFile(org.apache.hadoop.hdfs.util.PersistentLongFile) BestEffortLongFile(org.apache.hadoop.hdfs.util.BestEffortLongFile) File(java.io.File) FileInputStream(java.io.FileInputStream)

Aggregations

PersistedRecoveryPaxosData (org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData)3 File (java.io.File)2 SegmentStateProto (org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto)2 EditLogFile (org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile)2 BestEffortLongFile (org.apache.hadoop.hdfs.util.BestEffortLongFile)2 PersistentLongFile (org.apache.hadoop.hdfs.util.PersistentLongFile)2 FileInputStream (java.io.FileInputStream)1 InputStream (java.io.InputStream)1 PrepareRecoveryResponseProto (org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto)1