use of com.linkedin.databus.core.Checkpoint in project databus by linkedin.
the class BootstrapStartScnHttpResponseProcessor method finishResponse.
@Override
public void finishResponse() throws Exception {
super.finishResponse();
if (_errorHandled) {
return;
}
try {
String exceptionName = RemoteExceptionHandler.getExceptionName(_decorated);
Throwable remoteException = _remoteExceptionHandler.getException(_decorated);
if (null != remoteException && remoteException instanceof BootstrapDatabaseTooOldException) {
_remoteExceptionHandler.handleException(remoteException);
} else if (null != exceptionName) {
LOG.error("/targetScn response error: " + RemoteExceptionHandler.getExceptionMessage(_decorated));
_stateReuse.switchToTargetScnResponseError();
} else {
InputStream bodyStream = Channels.newInputStream(_decorated);
ObjectMapper mapper = new ObjectMapper();
String scnString = mapper.readValue(bodyStream, String.class);
LOG.info("targetScn:" + scnString);
long targetScn = Long.parseLong(scnString);
_stateReuse.switchToTargetScnSuccess();
// make sure we are in the expected mode -- sanity checks
Checkpoint ckpt = _checkpoint;
if (ckpt.getConsumptionMode() != DbusClientMode.BOOTSTRAP_SNAPSHOT) {
throw new InvalidCheckpointException("TargetScnResponseProcessor:" + " expecting in client mode: " + DbusClientMode.BOOTSTRAP_SNAPSHOT, ckpt);
} else if (!ckpt.isSnapShotSourceCompleted()) {
throw new InvalidCheckpointException("TargetScnResponseProcessor: current snapshot source not completed", ckpt);
}
LOG.info("Target SCN " + targetScn + " received for bootstrap catchup source " + ckpt.getCatchupSource() + " after completion of snapshot source " + ckpt.getSnapshotSource());
ckpt.setBootstrapTargetScn(targetScn);
}
} catch (Exception ex) {
LOG.error("/targetScn response error:" + ex.getMessage(), ex);
_stateReuse.switchToTargetScnResponseError();
}
_callback.enqueueMessage(_stateReuse);
}
use of com.linkedin.databus.core.Checkpoint in project databus by linkedin.
the class GenericDispatcher method doDispatchEvents.
protected void doDispatchEvents() {
boolean debugEnabled = _log.isDebugEnabled();
boolean traceEnabled = _log.isTraceEnabled();
//need to remove eventually but for now I want to avoid a nasty diff
final DispatcherState curState = _internalState;
if (!_stopDispatch.get() && !curState.getEventsIterator().hasNext() && !checkForShutdownRequest()) {
if (debugEnabled)
_log.debug("Waiting for events");
curState.getEventsIterator().await(50, TimeUnit.MILLISECONDS);
}
boolean success = true;
boolean hasQueuedEvents = false;
while (success && !_stopDispatch.get() && curState.getStateId() != DispatcherState.StateId.STOP_DISPATCH_EVENTS && null != curState.getEventsIterator() && curState.getEventsIterator().hasNext() && !checkForShutdownRequest() && //exit the event processing loop if there are other queued notifications
!hasMessages()) {
DbusEventInternalReadable nextEvent = curState.getEventsIterator().next();
_currentWindowSizeInBytes += nextEvent.size();
if (traceEnabled)
_log.trace("Got event:" + nextEvent);
Long eventSrcId = (long) nextEvent.srcId();
if (curState.isSCNRegress()) {
SingleSourceSCN scn = new SingleSourceSCN(nextEvent.physicalPartitionId(), nextEvent.sequence());
_log.info("We are regressing to SCN: " + scn);
curState.switchToRollback();
doRollback(curState, scn, false, false);
curState.setSCNRegress(false);
curState.switchToExpectEventWindow();
}
if (null != getAsyncCallback().getStats())
getAsyncCallback().getStats().registerWindowSeen(nextEvent.timestampInNanos(), nextEvent.sequence());
if (nextEvent.isControlMessage()) {
//control event
if (nextEvent.isEndOfPeriodMarker()) {
if (curState.isEventsSeen()) {
if (null != curState.getCurrentSource()) {
curState.switchToEndStreamSource();
success = doEndStreamSource(curState);
}
SCN endWinScn = null;
if (success) {
_lastWindowScn = nextEvent.sequence();
_lastEowTsNsecs = nextEvent.timestampInNanos();
endWinScn = new SingleSourceSCN(nextEvent.physicalPartitionId(), _lastWindowScn);
curState.switchToEndStreamEventWindow(endWinScn);
success = doEndStreamEventWindow(curState);
}
if (success) {
try {
//end of period event
Checkpoint cp = createCheckpoint(curState, nextEvent);
success = doStoreCheckpoint(curState, nextEvent, cp, endWinScn);
} catch (SharedCheckpointException e) {
//shutdown
return;
}
}
} else {
//empty window
success = true;
if (_log.isDebugEnabled()) {
_log.debug("skipping empty window: " + nextEvent.sequence());
}
//write a checkpoint; takes care of slow sources ; but skip storing the first control eop with 0 scn
if (nextEvent.sequence() > 0) {
_lastWindowScn = nextEvent.sequence();
//The reason is that the eop's timestamp is the max timestamp of all data events seen so far.
if (nextEvent.timestampInNanos() > 0) {
_lastEowTsNsecs = nextEvent.timestampInNanos();
}
Checkpoint ckpt = createCheckpoint(curState, nextEvent);
try {
success = doStoreCheckpoint(curState, nextEvent, ckpt, new SingleSourceSCN(nextEvent.physicalPartitionId(), nextEvent.sequence()));
} catch (SharedCheckpointException e) {
//shutdown
return;
}
} else {
_log.warn("EOP with scn=" + nextEvent.sequence());
}
}
if (success) {
curState.switchToExpectEventWindow();
//we have recovered from the error and it's not the dummy window
if (nextEvent.sequence() > 0) {
if (!getStatus().isRunningStatus())
getStatus().resume();
}
}
} else if (nextEvent.isErrorEvent()) {
_log.info("Error event: " + nextEvent.sequence());
success = processErrorEvent(curState, nextEvent);
} else {
//control event
success = processSysEvent(curState, nextEvent);
if (success) {
if (nextEvent.isCheckpointMessage()) {
Checkpoint sysCheckpt = createCheckpoint(curState, nextEvent);
try {
long scn = sysCheckpt.getConsumptionMode() == DbusClientMode.ONLINE_CONSUMPTION ? nextEvent.sequence() : sysCheckpt.getBootstrapSinceScn();
//ensure that control event with 0 scn doesn't get saved unless it is during snapshot of bootstrap
if (scn > 0 || sysCheckpt.getConsumptionMode() == DbusClientMode.BOOTSTRAP_SNAPSHOT) {
success = doStoreCheckpoint(curState, nextEvent, sysCheckpt, new SingleSourceSCN(nextEvent.physicalPartitionId(), scn));
}
} catch (SharedCheckpointException e) {
//shutdown
return;
}
}
}
}
} else {
curState.setEventsSeen(true);
//not a control event
if (curState.getStateId().equals(StateId.EXPECT_EVENT_WINDOW) || curState.getStateId().equals(StateId.REPLAY_DATA_EVENTS)) {
SCN startScn = new SingleSourceSCN(nextEvent.physicalPartitionId(), nextEvent.sequence());
curState.switchToStartStreamEventWindow(startScn);
success = doStartStreamEventWindow(curState);
if (success && (eventSrcId.longValue() >= 0)) {
success = doCheckStartSource(curState, eventSrcId, new SchemaId(nextEvent.schemaId()));
}
} else {
if (null != curState.getCurrentSource() && !eventSrcId.equals(curState.getCurrentSource().getId())) {
curState.switchToEndStreamSource();
success = doEndStreamSource(curState);
}
if (success) {
//Check if schemas of the source exist.
//Also check if the exact schema id present in event exists in the client. This is worthwhile if there's a
//guarantee that the entire window is written with the same schemaId, which is the case if the relay does not use a new schema
//mid-window
success = doCheckStartSource(curState, eventSrcId, new SchemaId(nextEvent.schemaId()));
}
}
if (success) {
//finally: process data event
success = processDataEvent(curState, nextEvent);
if (success) {
hasQueuedEvents = true;
if (hasCheckpointThresholdBeenExceeded()) {
_log.info("Attempting to checkpoint (only if the consumer callback for onCheckpoint returns SUCCESS), because " + getCurrentWindowSizeInBytes() + " bytes reached without checkpoint ");
success = processDataEventsBatch(curState);
if (success) {
hasQueuedEvents = false;
//checkpoint: for bootstrap it's the right checkpoint; that has been lazily created by a checkpoint event
// checkpoint: for relay: create a checkpoint that has the prevScn
Checkpoint cp = createCheckpoint(curState, nextEvent);
// DDSDBUS-1889 : scn for bootstrap is bootstrapSinceSCN
// scn for online consumption is : currentWindow
SCN lastScn = cp.getConsumptionMode() == DbusClientMode.ONLINE_CONSUMPTION ? curState.getStartWinScn() : new SingleSourceSCN(nextEvent.physicalPartitionId(), cp.getBootstrapSinceScn());
try {
// Even if storeCheckpoint fails, we
// should continue (hoping for the best)
success = doStoreCheckpoint(curState, nextEvent, cp, lastScn);
} catch (SharedCheckpointException e) {
// shutdown
return;
}
curState.switchToExpectStreamDataEvents();
if (!getStatus().isRunningStatus())
getStatus().resume();
}
}
}
}
}
if (success) {
// before next successful checkpoint
if (hasCheckpointThresholdBeenExceeded()) {
//drain events just in case it hasn't been drained before; mainly control events that are not checkpoint events
success = processDataEventsBatch(curState);
if (success) {
_log.warn("Checkpoint not stored, but removing older events from buffer to guarantee progress (checkpoint threshold has" + " exceeded), consider checkpointing more frequently. Triggered on control-event=" + nextEvent.isControlMessage());
// guarantee progress: risk being unable to rollback by
// removing events, but hope for the best
removeEvents(curState);
}
}
}
}
if (!_stopDispatch.get() && !checkForShutdownRequest()) {
if (success) {
if (hasQueuedEvents) {
success = processDataEventsBatch(curState);
if (!success) {
_log.error("Unable to flush partial window");
}
}
if (debugEnabled)
_log.debug("doDispatchEvents to " + curState.toString());
}
if (!success) {
curState.switchToRollback();
doRollback(curState);
}
//loop around -- let any other messages be processed
enqueueMessage(curState);
}
}
use of com.linkedin.databus.core.Checkpoint in project databus by linkedin.
the class RelayDispatcher method processSysEvent.
@Override
protected boolean processSysEvent(DispatcherState curState, DbusEvent event) {
boolean success = true;
if (event.isCheckpointMessage()) {
Checkpoint ckpt = null;
try {
ckpt = DbusEventUtils.getCheckpointFromEvent(event);
DbusClientMode bootstrapMode = ckpt.getConsumptionMode();
if (bootstrapMode != DbusClientMode.ONLINE_CONSUMPTION) {
if (_bootstrapPuller == null) {
_log.error("Checkpoint specifies that the consumer is bootstrapping, but bootstrapPuller is not present (Is bootstrap disabled ?)");
return false;
}
ckpt.setConsumptionMode(DbusClientMode.BOOTSTRAP_SNAPSHOT);
if (curState.getStateId() != DispatcherState.StateId.EXPECT_EVENT_WINDOW) {
_log.warn("The current state of the dispatcher is NOT as expected (" + DispatcherState.StateId.EXPECT_EVENT_WINDOW.name() + "). State prior to this: " + curState.getStateId().name());
//Fixing bug that caused TestRelayBootstrapSwitch to fail; no apparent need to rollback
//curState.switchToRollback();
//doRollback(curState);
}
curState.getEventsIterator().getEventBuffer().clear();
curState.resetIterators();
curState.switchToExpectEventWindow();
_bootstrapPuller.enqueueMessage(LifecycleMessage.createStartMessage());
_log.info("Switching to bootstrap mode");
} else {
success = super.processSysEvent(curState, event);
}
} catch (Exception e) {
DbusPrettyLogUtils.logExceptionAtError("Internal error processing a system event", e, _log);
success = false;
}
} else if (event.isSCNRegressMessage()) {
SCNRegressMessage message = DbusEventUtils.getSCNRegressFromEvent(event);
_log.info("Switching relays, some of the events maybe replayed. The Checkpoint to which the client with regress: " + message);
curState.setSCNRegress(true);
curState.switchToExpectEventWindow();
//enqueueMessage(curState);
} else {
success = super.processSysEvent(curState, event);
}
return success;
}
use of com.linkedin.databus.core.Checkpoint in project databus by linkedin.
the class RelayPullThread method doBootstrap.
protected void doBootstrap(ConnectionState curState) {
if (null != _lastOpenConnection) {
_lastOpenConnection.close();
_lastOpenConnection = null;
}
Checkpoint bootstrapCkpt = null;
if (_sourcesConn.getBootstrapPuller() == null) {
_log.warn("doBootstrap got called, but BootstrapPullThread is null. Is bootstrap disabled?");
return;
}
try {
bootstrapCkpt = curState.getCheckpoint().clone();
} catch (Exception e) {
String msg = "Error copying checkpoint:" + curState.getCheckpoint();
_log.error(msg, e);
BootstrapResultMessage bootstrapResultMessage = BootstrapResultMessage.createBootstrapFailedMessage(e);
doBootstrapFailed(bootstrapResultMessage);
return;
}
if (!bootstrapCkpt.isBootstrapStartScnSet()) {
bootstrapCkpt = curState.getBstCheckpointHandler().createInitialBootstrapCheckpoint(bootstrapCkpt, bootstrapCkpt.getWindowScn());
//bootstrapCkpt.setBootstrapSinceScn(Long.valueOf(bootstrapCkpt.getWindowScn()));
}
_log.info("Bootstrap begin: sinceScn=" + bootstrapCkpt.getWindowScn());
CheckpointMessage bootstrapCpMessage = CheckpointMessage.createSetCheckpointMessage(bootstrapCkpt);
_sourcesConn.getBootstrapPuller().enqueueMessage(bootstrapCpMessage);
try {
Checkpoint cpForDispatcher = new Checkpoint(bootstrapCkpt.toString());
cpForDispatcher.setConsumptionMode(DbusClientMode.BOOTSTRAP_SNAPSHOT);
DbusEvent cpEvent = getEventFactory().createCheckpointEvent(cpForDispatcher);
writeEventToRelayDispatcher(curState, cpEvent, "Control Event to start bootstrap");
curState.switchToBootstrapRequested();
} catch (InterruptedException ie) {
_log.error("Got interrupted while writing control message to bootstrap !!", ie);
enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(ie));
} catch (Exception e) {
enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(e));
_log.error("Exception occured while switching to bootstrap: ", e);
}
//wait for bootstrap to finish
/*boolean bootstrapDone = false;
while (! bootstrapDone)
{
try
{
bootstrapPullerThread.join();
bootstrapDone = true;
}
catch (InterruptedException ie) {}
}*/
}
use of com.linkedin.databus.core.Checkpoint in project databus by linkedin.
the class RelayPullThread method doRegisterResponseSuccess.
protected void doRegisterResponseSuccess(ConnectionState curState) {
boolean enqueueMessage = true;
mergeRelayCallsStats();
if (curState.getSourcesSchemas().size() < _sourcesConn.getSourcesNames().size()) {
_log.error("Expected " + _sourcesConn.getSourcesNames().size() + " schemas, got: " + curState.getSourcesSchemas().size());
curState.switchToPickServer();
} else {
_sourcesConn.getRelayDispatcher().enqueueMessage(SourcesMessage.createSetSourcesSchemasMessage(curState.getSourcesSchemas(), curState.getMetadataSchemas()));
// Determine the checkpoint for read events in the following order
// 1. Existing checkpoint in the current state
// 2. Checkpoint persisted on disk
// 3. New checkpoint
Checkpoint cp = _currentState.getCheckpoint();
if (null == cp) {
_log.info("no existing checkpoint found; attempting to load persistent checkpoint");
cp = _sourcesConn.loadPersistentCheckpoint();
}
if (null == cp) {
_log.info(getName() + ": no checkpoint found");
cp = new Checkpoint();
// set the mode to streaming first so relay will inspect the scn
cp.setConsumptionMode(DbusClientMode.ONLINE_CONSUMPTION);
// setting windowScn makes server to return scn not found error
cp.setWindowScn(0L);
cp.clearBootstrapStartScn();
if (_isConsumeCurrent) {
cp.setFlexible();
_log.info("Setting flexible checkpoint: consumeCurrent is true");
}
} else {
_log.info("persisted checkpoint loaded: " + cp.toString());
}
if (cp.getFlexible())
curState.setFlexibleCheckpointRequest(true);
if (toTearConnAfterHandlingResponse()) {
tearConnectionAndEnqueuePickServer();
enqueueMessage = false;
} else {
if (_sourcesConn.isBootstrapEnabled()) {
_sourcesConn.getBootstrapPuller().enqueueMessage(SourcesMessage.createSetSourcesSchemasMessage(curState.getSourcesSchemas(), curState.getMetadataSchemas()));
}
if (DbusClientMode.BOOTSTRAP_SNAPSHOT == cp.getConsumptionMode() || DbusClientMode.BOOTSTRAP_CATCHUP == cp.getConsumptionMode()) {
curState.setRelayFellOff(true);
if (_sourcesConn.isBootstrapEnabled()) {
curState.switchToBootstrap(cp);
} else {
String message = "bootstrap checkpoint found but bootstrapping is disabled:" + cp;
_log.error(message);
_status.suspendOnError(new DatabusException(message));
enqueueMessage = false;
}
} else {
if (cp.getWindowOffset() > 0) {
// switched when in middle of Window
_log.info("RelayPuller reconnecting when in middle of event window. Will regress. Current Checkpoint :" + cp);
if (cp.getPrevScn() > 0) {
cp.setWindowScn(cp.getPrevScn());
cp.setWindowOffset(-1);
curState.setSCNRegress(true);
} else if (curState.isFlexibleCheckpointRequest()) {
_log.info("Switched relays after reading partial window with flexible checkpoint !!");
cp.setFlexible();
curState.setSCNRegress(true);
} else {
_log.fatal("Checkpoint does not have prevSCN !!. Suspending !! Checkpoint :" + cp);
enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(new Exception("Checkpoint does not have prevSCN !!. Suspending !! Checkpoint :" + cp)));
enqueueMessage = false;
}
}
if (enqueueMessage)
curState.switchToRequestStream(cp);
}
}
}
if (enqueueMessage)
enqueueMessage(curState);
}
Aggregations