Search in sources :

Example 11 with TermIndex

use of org.apache.ratis.server.protocol.TermIndex in project alluxio by Alluxio.

the class SnapshotReplicationManager method installDownloadedSnapshot.

/**
 * Installs a downloaded snapshot in the journal snapshot directory.
 *
 * @return the index of the installed snapshot
 */
private long installDownloadedSnapshot() {
    if (!transitionState(DownloadState.DOWNLOADED, DownloadState.INSTALLING)) {
        return RaftLog.INVALID_LOG_INDEX;
    }
    File tempFile = null;
    try (Timer.Context ctx = MetricsSystem.timer(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_INSTALL_TIMER.getName()).time()) {
        SnapshotInfo snapshot = mDownloadedSnapshot;
        if (snapshot == null) {
            throw new IllegalStateException("Snapshot is not completed");
        }
        FileInfo fileInfo = snapshot.getFiles().get(0);
        tempFile = fileInfo.getPath().toFile();
        if (!tempFile.exists()) {
            throw new FileNotFoundException(String.format("Snapshot file %s is not found", tempFile));
        }
        SnapshotInfo latestSnapshot = mStorage.getLatestSnapshot();
        TermIndex lastInstalled = latestSnapshot == null ? null : latestSnapshot.getTermIndex();
        TermIndex downloaded = snapshot.getTermIndex();
        if (lastInstalled != null && downloaded.compareTo(lastInstalled) < 0) {
            throw new AbortedException(String.format("Snapshot to be installed %s is older than current snapshot %s", downloaded, lastInstalled));
        }
        final File snapshotFile = mStorage.getSnapshotFile(downloaded.getTerm(), downloaded.getIndex());
        LOG.debug("Moving temp snapshot {} to file {}", tempFile, snapshotFile);
        MD5FileUtil.saveMD5File(snapshotFile, fileInfo.getFileDigest());
        if (!tempFile.renameTo(snapshotFile)) {
            throw new IOException(String.format("Failed to rename %s to %s", tempFile, snapshotFile));
        }
        mStorage.loadLatestSnapshot();
        LOG.info("Completed storing snapshot at {} to file {}", downloaded, snapshotFile);
        return downloaded.getIndex();
    } catch (Exception e) {
        LOG.error("Failed to install snapshot", e);
        if (tempFile != null) {
            tempFile.delete();
        }
        return RaftLog.INVALID_LOG_INDEX;
    } finally {
        transitionState(DownloadState.INSTALLING, DownloadState.IDLE);
    }
}
Also used : SnapshotInfo(org.apache.ratis.statemachine.SnapshotInfo) SingleFileSnapshotInfo(org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo) Timer(com.codahale.metrics.Timer) FileInfo(org.apache.ratis.server.storage.FileInfo) AbortedException(alluxio.exception.status.AbortedException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) File(java.io.File) CompletionException(java.util.concurrent.CompletionException) FileNotFoundException(java.io.FileNotFoundException) AbortedException(alluxio.exception.status.AbortedException) AlluxioStatusException(alluxio.exception.status.AlluxioStatusException) IOException(java.io.IOException) NotFoundException(alluxio.exception.status.NotFoundException) TermIndex(org.apache.ratis.server.protocol.TermIndex)

Example 12 with TermIndex

use of org.apache.ratis.server.protocol.TermIndex in project alluxio by Alluxio.

the class SnapshotReplicationManager method installSnapshotFromLeader.

/**
 * Downloads and installs a snapshot from the leader.
 *
 * @return a future with the term index of the installed snapshot
 */
public CompletableFuture<TermIndex> installSnapshotFromLeader() {
    if (mJournalSystem.isLeader()) {
        return RaftJournalUtils.completeExceptionally(new IllegalStateException("Abort snapshot installation after becoming a leader"));
    }
    if (!transitionState(DownloadState.IDLE, DownloadState.STREAM_DATA)) {
        return RaftJournalUtils.completeExceptionally(new IllegalStateException("State is not IDLE when starting a snapshot installation"));
    }
    try (RaftJournalServiceClient client = getJournalServiceClient()) {
        String address = String.valueOf(client.getAddress());
        SnapshotDownloader<DownloadSnapshotPRequest, DownloadSnapshotPResponse> observer = SnapshotDownloader.forFollower(mStorage, address);
        Timer.Context ctx = MetricsSystem.timer(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_TIMER.getName()).time();
        client.downloadSnapshot(observer);
        return observer.getFuture().thenApplyAsync((termIndex) -> {
            ctx.close();
            mDownloadedSnapshot = observer.getSnapshotToInstall();
            transitionState(DownloadState.STREAM_DATA, DownloadState.DOWNLOADED);
            long index = installDownloadedSnapshot();
            if (index == RaftLog.INVALID_LOG_INDEX) {
                throw new CompletionException(new RuntimeException(String.format("Failed to install the downloaded snapshot %s", termIndex)));
            }
            if (index != termIndex.getIndex()) {
                throw new CompletionException(new IllegalStateException(String.format("Mismatched snapshot installed - downloaded %d, installed %d", termIndex.getIndex(), index)));
            }
            return termIndex;
        }).whenComplete((termIndex, throwable) -> {
            if (throwable != null) {
                LOG.error("Unexpected exception downloading snapshot from leader {}.", address, throwable);
                transitionState(DownloadState.STREAM_DATA, DownloadState.IDLE);
            }
        });
    } catch (Exception e) {
        transitionState(DownloadState.STREAM_DATA, DownloadState.IDLE);
        return RaftJournalUtils.completeExceptionally(e);
    }
}
Also used : DownloadSnapshotPRequest(alluxio.grpc.DownloadSnapshotPRequest) TermIndex(org.apache.ratis.server.protocol.TermIndex) PriorityQueue(java.util.PriorityQueue) LoggerFactory(org.slf4j.LoggerFactory) LogUtils(alluxio.util.LogUtils) GetSnapshotInfoResponse(alluxio.grpc.GetSnapshotInfoResponse) StreamObserver(io.grpc.stub.StreamObserver) JournalQueryRequest(alluxio.grpc.JournalQueryRequest) MetricKey(alluxio.metrics.MetricKey) Map(java.util.Map) Status(io.grpc.Status) ClientContext(alluxio.ClientContext) SnapshotInfo(org.apache.ratis.statemachine.SnapshotInfo) SingleFileSnapshotInfo(org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo) UnsafeByteOperations(org.apache.ratis.thirdparty.com.google.protobuf.UnsafeByteOperations) ServerConfiguration(alluxio.conf.ServerConfiguration) JournalQueryResponse(alluxio.grpc.JournalQueryResponse) CompletionException(java.util.concurrent.CompletionException) SimpleStateMachineStorage(org.apache.ratis.statemachine.impl.SimpleStateMachineStorage) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) RaftClientReply(org.apache.ratis.protocol.RaftClientReply) Timer(com.codahale.metrics.Timer) FileInfo(org.apache.ratis.server.storage.FileInfo) SnapshotData(alluxio.grpc.SnapshotData) MD5FileUtil(org.apache.ratis.util.MD5FileUtil) GetSnapshotInfoRequest(alluxio.grpc.GetSnapshotInfoRequest) UploadSnapshotPResponse(alluxio.grpc.UploadSnapshotPResponse) RaftLog(org.apache.ratis.server.raftlog.RaftLog) DownloadSnapshotPRequest(alluxio.grpc.DownloadSnapshotPRequest) CompletableFuture(java.util.concurrent.CompletableFuture) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) Message(org.apache.ratis.protocol.Message) GetSnapshotRequest(alluxio.grpc.GetSnapshotRequest) QuorumServerState(alluxio.grpc.QuorumServerState) SnapshotMetadata(alluxio.grpc.SnapshotMetadata) ClientIpAddressInjector(alluxio.security.authentication.ClientIpAddressInjector) AbortedException(alluxio.exception.status.AbortedException) MetricsSystem(alluxio.metrics.MetricsSystem) AlluxioStatusException(alluxio.exception.status.AlluxioStatusException) DownloadSnapshotPResponse(alluxio.grpc.DownloadSnapshotPResponse) Logger(org.slf4j.Logger) RaftPeerId(org.apache.ratis.protocol.RaftPeerId) IOException(java.io.IOException) Pair(alluxio.collections.Pair) NotFoundException(alluxio.exception.status.NotFoundException) File(java.io.File) MessageLite(com.google.protobuf.MessageLite) UploadSnapshotPRequest(alluxio.grpc.UploadSnapshotPRequest) MasterClientContext(alluxio.master.MasterClientContext) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Timer(com.codahale.metrics.Timer) CompletionException(java.util.concurrent.CompletionException) DownloadSnapshotPResponse(alluxio.grpc.DownloadSnapshotPResponse) CompletionException(java.util.concurrent.CompletionException) FileNotFoundException(java.io.FileNotFoundException) AbortedException(alluxio.exception.status.AbortedException) AlluxioStatusException(alluxio.exception.status.AlluxioStatusException) IOException(java.io.IOException) NotFoundException(alluxio.exception.status.NotFoundException)

Example 13 with TermIndex

use of org.apache.ratis.server.protocol.TermIndex in project alluxio by Alluxio.

the class JournalStateMachine method takeLocalSnapshot.

/**
 * Takes a snapshot of local state machine.
 * @return the index of last included entry, or {@link RaftLog#INVALID_LOG_INDEX} if it fails
 */
public synchronized long takeLocalSnapshot() {
    // Snapshot format is [snapshotId, name1, bytes1, name2, bytes2, ...].
    if (mClosed) {
        SAMPLING_LOG.info("Skip taking snapshot because state machine is closed.");
        return RaftLog.INVALID_LOG_INDEX;
    }
    if (mServer.getLifeCycleState() != LifeCycle.State.RUNNING) {
        SAMPLING_LOG.info("Skip taking snapshot because raft server is not in running state: " + "current state is {}.", mServer.getLifeCycleState());
        return RaftLog.INVALID_LOG_INDEX;
    }
    if (mJournalApplier.isSuspended()) {
        SAMPLING_LOG.info("Skip taking snapshot while journal application is suspended.");
        return RaftLog.INVALID_LOG_INDEX;
    }
    if (!mJournalSystem.isSnapshotAllowed()) {
        SAMPLING_LOG.info("Skip taking snapshot when it is not allowed by the journal system.");
        return RaftLog.INVALID_LOG_INDEX;
    }
    LOG.debug("Calling snapshot");
    Preconditions.checkState(!mSnapshotting, "Cannot call snapshot multiple times concurrently");
    mSnapshotting = true;
    try (Timer.Context ctx = MetricsSystem.timer(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_GENERATE_TIMER.getName()).time()) {
        mLastSnapshotStartTime = System.currentTimeMillis();
        long snapshotId = mNextSequenceNumberToRead - 1;
        TermIndex last = getLastAppliedTermIndex();
        File tempFile;
        try {
            tempFile = RaftJournalUtils.createTempSnapshotFile(mStorage);
        } catch (IOException e) {
            LogUtils.warnWithException(LOG, "Failed to create temp snapshot file", e);
            return RaftLog.INVALID_LOG_INDEX;
        }
        LOG.info("Taking a snapshot to file {}", tempFile);
        final File snapshotFile = mStorage.getSnapshotFile(last.getTerm(), last.getIndex());
        try (DataOutputStream outputStream = new DataOutputStream(new FileOutputStream(tempFile))) {
            outputStream.writeLong(snapshotId);
            JournalUtils.writeToCheckpoint(outputStream, getStateMachines());
        } catch (Exception e) {
            tempFile.delete();
            LogUtils.warnWithException(LOG, "Failed to write snapshot {} to file {}", snapshotId, tempFile, e);
            return RaftLog.INVALID_LOG_INDEX;
        }
        try {
            final MD5Hash digest = MD5FileUtil.computeMd5ForFile(tempFile);
            LOG.info("Saving digest for snapshot file {}", snapshotFile);
            MD5FileUtil.saveMD5File(snapshotFile, digest);
            LOG.info("Renaming a snapshot file {} to {}", tempFile, snapshotFile);
            if (!tempFile.renameTo(snapshotFile)) {
                tempFile.delete();
                LOG.warn("Failed to rename snapshot from {} to {}", tempFile, snapshotFile);
                return RaftLog.INVALID_LOG_INDEX;
            }
            LOG.info("Completed snapshot up to SN {} in {}ms", snapshotId, System.currentTimeMillis() - mLastSnapshotStartTime);
        } catch (Exception e) {
            tempFile.delete();
            LogUtils.warnWithException(LOG, "Failed to complete snapshot: {} - {}", snapshotId, snapshotFile, e);
            return RaftLog.INVALID_LOG_INDEX;
        }
        try {
            mStorage.loadLatestSnapshot();
        } catch (Exception e) {
            snapshotFile.delete();
            LogUtils.warnWithException(LOG, "Failed to refresh latest snapshot: {}", snapshotId, e);
            return RaftLog.INVALID_LOG_INDEX;
        }
        mSnapshotLastIndex = last.getIndex();
        mLastCheckPointTime = System.currentTimeMillis();
        return last.getIndex();
    } finally {
        mSnapshotting = false;
    }
}
Also used : Timer(com.codahale.metrics.Timer) DataOutputStream(java.io.DataOutputStream) FileOutputStream(java.io.FileOutputStream) MD5Hash(org.apache.ratis.io.MD5Hash) IOException(java.io.IOException) File(java.io.File) CompletionException(java.util.concurrent.CompletionException) FileNotFoundException(java.io.FileNotFoundException) UnavailableException(alluxio.exception.status.UnavailableException) IOException(java.io.IOException) TermIndex(org.apache.ratis.server.protocol.TermIndex)

Example 14 with TermIndex

use of org.apache.ratis.server.protocol.TermIndex in project incubator-ratis by apache.

the class LogAppender method getPrevious.

private TermIndex getPrevious() {
    TermIndex previous = raftLog.getTermIndex(follower.getNextIndex() - 1);
    if (previous == null) {
        // if previous is null, nextIndex must be equal to the log start
        // index (otherwise we will install snapshot).
        Preconditions.assertTrue(follower.getNextIndex() == raftLog.getStartIndex(), "follower's next index %s, local log start index %s", follower.getNextIndex(), raftLog.getStartIndex());
        SnapshotInfo snapshot = server.getState().getLatestSnapshot();
        previous = snapshot == null ? null : snapshot.getTermIndex();
    }
    return previous;
}
Also used : SnapshotInfo(org.apache.ratis.statemachine.SnapshotInfo) TermIndex(org.apache.ratis.server.protocol.TermIndex)

Example 15 with TermIndex

use of org.apache.ratis.server.protocol.TermIndex in project incubator-ratis by apache.

the class LogAppender method createRequest.

protected AppendEntriesRequestProto createRequest() throws RaftLogIOException {
    final TermIndex previous = getPrevious();
    final long leaderNext = raftLog.getNextIndex();
    long next = follower.getNextIndex() + buffer.getPendingEntryNum();
    final boolean toSend;
    if (leaderNext == next && !buffer.isEmpty()) {
        // no new entries, then send out the entries in the buffer
        toSend = true;
    } else if (leaderNext > next) {
        boolean hasSpace = true;
        for (; hasSpace && leaderNext > next; ) {
            hasSpace = buffer.addEntry(raftLog.get(next++));
        }
        // buffer is full or batch sending is disabled, send out a request
        toSend = !hasSpace || !batchSending;
    } else {
        toSend = false;
    }
    if (toSend || shouldHeartbeat()) {
        return buffer.getAppendRequest(previous);
    }
    return null;
}
Also used : TermIndex(org.apache.ratis.server.protocol.TermIndex)

Aggregations

TermIndex (org.apache.ratis.server.protocol.TermIndex)25 IOException (java.io.IOException)8 File (java.io.File)7 AutoCloseableLock (org.apache.ratis.util.AutoCloseableLock)6 SnapshotInfo (org.apache.ratis.statemachine.SnapshotInfo)5 BaseTest (org.apache.ratis.BaseTest)4 LogEntryProto (org.apache.ratis.shaded.proto.RaftProtos.LogEntryProto)4 Test (org.junit.Test)4 Timer (com.codahale.metrics.Timer)3 FileNotFoundException (java.io.FileNotFoundException)3 CompletableFuture (java.util.concurrent.CompletableFuture)3 CompletionException (java.util.concurrent.CompletionException)3 MD5Hash (org.apache.ratis.io.MD5Hash)3 RaftServerImpl (org.apache.ratis.server.impl.RaftServerImpl)3 FileInfo (org.apache.ratis.server.storage.FileInfo)3 SingleFileSnapshotInfo (org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo)3 AbortedException (alluxio.exception.status.AbortedException)2 AlluxioStatusException (alluxio.exception.status.AlluxioStatusException)2 NotFoundException (alluxio.exception.status.NotFoundException)2 FileOutputStream (java.io.FileOutputStream)2