Search in sources :

Example 1 with AgentTerminationException

use of org.agrona.concurrent.AgentTerminationException in project Aeron by real-logic.

the class ClusterBackupAgent method slowTick.

private int slowTick(final long nowMs) {
    int workCount = aeronClientInvoker.invoke();
    if (aeron.isClosed()) {
        throw new AgentTerminationException("unexpected Aeron close");
    }
    if (nowMs >= markFileUpdateDeadlineMs) {
        markFileUpdateDeadlineMs = nowMs + MARK_FILE_UPDATE_INTERVAL_MS;
        markFile.updateActivityTimestamp(nowMs);
    }
    workCount += pollBackupArchiveEvents();
    if (NULL_VALUE == correlationId && null != clusterArchive) {
        final String errorResponse = clusterArchive.pollForErrorResponse();
        if (null != errorResponse) {
            ctx.countedErrorHandler().onError(new ClusterException("cluster archive - " + errorResponse, WARN));
            state(RESET_BACKUP, nowMs);
        }
    }
    return workCount;
}
Also used : AgentTerminationException(org.agrona.concurrent.AgentTerminationException) ClusterException(io.aeron.cluster.client.ClusterException)

Example 2 with AgentTerminationException

use of org.agrona.concurrent.AgentTerminationException in project Aeron by real-logic.

the class ClusterBackupAgent method doWork.

/**
 * {@inheritDoc}
 */
public int doWork() {
    final long nowMs = epochClock.time();
    int workCount = 0;
    try {
        if (nowMs > slowTickDeadlineMs) {
            slowTickDeadlineMs = nowMs + SLOW_TICK_INTERVAL_MS;
            workCount += slowTick(nowMs);
        }
        workCount += consensusSubscription.poll(fragmentAssembler, ConsensusAdapter.FRAGMENT_LIMIT);
        switch(state) {
            case BACKUP_QUERY:
                workCount += backupQuery(nowMs);
                break;
            case SNAPSHOT_RETRIEVE:
                workCount += snapshotRetrieve(nowMs);
                break;
            case LIVE_LOG_RECORD:
                workCount += liveLogRecord(nowMs);
                break;
            case LIVE_LOG_REPLAY:
                workCount += liveLogReplay(nowMs);
                break;
            case UPDATE_RECORDING_LOG:
                workCount += updateRecordingLog(nowMs);
                break;
            case BACKING_UP:
                workCount += backingUp(nowMs);
                break;
            case RESET_BACKUP:
                workCount += resetBackup(nowMs);
                break;
        }
        if (hasProgressStalled(nowMs)) {
            if (null != eventsListener) {
                eventsListener.onPossibleFailure(new TimeoutException("progress has stalled", Category.WARN));
            }
            state(RESET_BACKUP, nowMs);
        }
    } catch (final AgentTerminationException ex) {
        runTerminationHook(ex);
    } catch (final Exception ex) {
        if (null != eventsListener) {
            eventsListener.onPossibleFailure(ex);
        }
        state(RESET_BACKUP, nowMs);
        throw ex;
    }
    return workCount;
}
Also used : AgentTerminationException(org.agrona.concurrent.AgentTerminationException) ClusterException(io.aeron.cluster.client.ClusterException) TimeoutException(io.aeron.exceptions.TimeoutException) AgentTerminationException(org.agrona.concurrent.AgentTerminationException) TimeoutException(io.aeron.exceptions.TimeoutException)

Example 3 with AgentTerminationException

use of org.agrona.concurrent.AgentTerminationException in project Aeron by real-logic.

the class Election method ensureRecordingLogCoherent.

private void ensureRecordingLogCoherent(final long leadershipTermId, final long logTermBasePosition, final long logPosition, final long nowNs) {
    final long recordingId = consensusModuleAgent.logRecordingId();
    if (NULL_VALUE == recordingId) {
        if (0 == logPosition) {
            return;
        }
        throw new AgentTerminationException("log recording id not found");
    }
    final long timestamp = ctx.clusterClock().timeUnit().convert(nowNs, TimeUnit.NANOSECONDS);
    final RecordingLog recordingLog = ctx.recordingLog();
    RecordingLog.Entry lastTerm = recordingLog.findLastTerm();
    if (null == lastTerm) {
        for (long termId = 0; termId < leadershipTermId; termId++) {
            recordingLog.appendTerm(recordingId, termId, 0, timestamp);
            recordingLog.commitLogPosition(termId, 0);
        }
        recordingLog.appendTerm(recordingId, leadershipTermId, 0, timestamp);
        if (NULL_VALUE != logPosition) {
            recordingLog.commitLogPosition(leadershipTermId, logPosition);
        }
    } else if (lastTerm.leadershipTermId < leadershipTermId) {
        if (NULL_VALUE == lastTerm.logPosition) {
            if (NULL_VALUE == logTermBasePosition) {
                throw new ClusterException("Prior term was not committed: " + lastTerm + " and logTermBasePosition was not specified: leadershipTermId = " + leadershipTermId + ", logTermBasePosition = " + logTermBasePosition + ", logPosition = " + logPosition + ", nowNs = " + nowNs);
            } else {
                recordingLog.commitLogPosition(lastTerm.leadershipTermId, logTermBasePosition);
                lastTerm = Objects.requireNonNull(recordingLog.findLastTerm());
            }
        }
        for (long termId = lastTerm.leadershipTermId + 1; termId < leadershipTermId; termId++) {
            recordingLog.appendTerm(recordingId, termId, lastTerm.logPosition, timestamp);
            recordingLog.commitLogPosition(termId, lastTerm.logPosition);
        }
        recordingLog.appendTerm(recordingId, leadershipTermId, lastTerm.logPosition, timestamp);
        if (NULL_VALUE != logPosition) {
            recordingLog.commitLogPosition(leadershipTermId, logPosition);
        }
    } else {
        if (NULL_VALUE != logPosition) {
            recordingLog.commitLogPosition(leadershipTermId, logPosition);
        }
    }
    recordingLog.force(ctx.fileSyncLevel());
}
Also used : AgentTerminationException(org.agrona.concurrent.AgentTerminationException) ClusterException(io.aeron.cluster.client.ClusterException)

Example 4 with AgentTerminationException

use of org.agrona.concurrent.AgentTerminationException in project aeron by real-logic.

the class ClusterBackupAgent method slowTick.

private int slowTick(final long nowMs) {
    int workCount = aeronClientInvoker.invoke();
    if (aeron.isClosed()) {
        throw new AgentTerminationException("unexpected Aeron close");
    }
    if (nowMs >= markFileUpdateDeadlineMs) {
        markFileUpdateDeadlineMs = nowMs + MARK_FILE_UPDATE_INTERVAL_MS;
        markFile.updateActivityTimestamp(nowMs);
    }
    workCount += pollBackupArchiveEvents();
    if (NULL_VALUE == correlationId && null != clusterArchive) {
        final String errorResponse = clusterArchive.pollForErrorResponse();
        if (null != errorResponse) {
            ctx.countedErrorHandler().onError(new ClusterException("cluster archive - " + errorResponse, WARN));
            state(RESET_BACKUP, nowMs);
        }
    }
    return workCount;
}
Also used : AgentTerminationException(org.agrona.concurrent.AgentTerminationException) ClusterException(io.aeron.cluster.client.ClusterException)

Example 5 with AgentTerminationException

use of org.agrona.concurrent.AgentTerminationException in project aeron by real-logic.

the class ClusterBackupAgent method updateRecordingLog.

private int updateRecordingLog(final long nowMs) {
    boolean wasRecordingLogUpdated = false;
    try {
        final long snapshotLeadershipTermId = snapshotsRetrieved.isEmpty() ? NULL_VALUE : snapshotsRetrieved.get(0).leadershipTermId;
        if (null != leaderLogEntry && recordingLog.isUnknown(leaderLogEntry.leadershipTermId) && leaderLogEntry.leadershipTermId <= snapshotLeadershipTermId) {
            recordingLog.appendTerm(liveLogRecordingId, leaderLogEntry.leadershipTermId, leaderLogEntry.termBaseLogPosition, leaderLogEntry.timestamp);
            wasRecordingLogUpdated = true;
            leaderLogEntry = null;
        }
        if (!snapshotsRetrieved.isEmpty()) {
            for (int i = snapshotsRetrieved.size() - 1; i >= 0; i--) {
                final RecordingLog.Snapshot snapshot = snapshotsRetrieved.get(i);
                recordingLog.appendSnapshot(snapshot.recordingId, snapshot.leadershipTermId, snapshot.termBaseLogPosition, snapshot.logPosition, snapshot.timestamp, snapshot.serviceId);
            }
            wasRecordingLogUpdated = true;
        }
        if (null != leaderLastTermEntry && recordingLog.isUnknown(leaderLastTermEntry.leadershipTermId)) {
            recordingLog.appendTerm(liveLogRecordingId, leaderLastTermEntry.leadershipTermId, leaderLastTermEntry.termBaseLogPosition, leaderLastTermEntry.timestamp);
            wasRecordingLogUpdated = true;
            leaderLastTermEntry = null;
        }
    } catch (final Exception ex) {
        ctx.countedErrorHandler().onError(ex);
        throw new AgentTerminationException("failed to update recording log");
    }
    if (wasRecordingLogUpdated && null != eventsListener) {
        eventsListener.onUpdatedRecordingLog(recordingLog, snapshotsRetrieved);
    }
    snapshotsRetrieved.clear();
    snapshotsToRetrieve.clear();
    timeOfLastProgressMs = nowMs;
    nextQueryDeadlineMsCounter.setOrdered(nowMs + backupQueryIntervalMs);
    state(BACKING_UP, nowMs);
    return 1;
}
Also used : AgentTerminationException(org.agrona.concurrent.AgentTerminationException) ClusterException(io.aeron.cluster.client.ClusterException) TimeoutException(io.aeron.exceptions.TimeoutException) AgentTerminationException(org.agrona.concurrent.AgentTerminationException)

Aggregations

ClusterException (io.aeron.cluster.client.ClusterException)10 AgentTerminationException (org.agrona.concurrent.AgentTerminationException)10 TimeoutException (io.aeron.exceptions.TimeoutException)4