Search in sources :

Example 66 with Checkpoint

use of com.linkedin.databus.core.Checkpoint in project databus by linkedin.

the class BootstrapStartScnHttpResponseProcessor method finishResponse.

@Override
public void finishResponse() throws Exception {
    super.finishResponse();
    if (_errorHandled) {
        return;
    }
    try {
        String exceptionName = RemoteExceptionHandler.getExceptionName(_decorated);
        Throwable remoteException = _remoteExceptionHandler.getException(_decorated);
        if (null != remoteException && remoteException instanceof BootstrapDatabaseTooOldException) {
            _remoteExceptionHandler.handleException(remoteException);
        } else if (null != exceptionName) {
            LOG.error("/targetScn response error: " + RemoteExceptionHandler.getExceptionMessage(_decorated));
            _stateReuse.switchToTargetScnResponseError();
        } else {
            InputStream bodyStream = Channels.newInputStream(_decorated);
            ObjectMapper mapper = new ObjectMapper();
            String scnString = mapper.readValue(bodyStream, String.class);
            LOG.info("targetScn:" + scnString);
            long targetScn = Long.parseLong(scnString);
            _stateReuse.switchToTargetScnSuccess();
            // make sure we are in the expected mode -- sanity checks
            Checkpoint ckpt = _checkpoint;
            if (ckpt.getConsumptionMode() != DbusClientMode.BOOTSTRAP_SNAPSHOT) {
                throw new InvalidCheckpointException("TargetScnResponseProcessor:" + " expecting in client mode: " + DbusClientMode.BOOTSTRAP_SNAPSHOT, ckpt);
            } else if (!ckpt.isSnapShotSourceCompleted()) {
                throw new InvalidCheckpointException("TargetScnResponseProcessor: current snapshot source not completed", ckpt);
            }
            LOG.info("Target SCN " + targetScn + " received for bootstrap catchup source " + ckpt.getCatchupSource() + " after completion of snapshot source " + ckpt.getSnapshotSource());
            ckpt.setBootstrapTargetScn(targetScn);
        }
    } catch (Exception ex) {
        LOG.error("/targetScn response error:" + ex.getMessage(), ex);
        _stateReuse.switchToTargetScnResponseError();
    }
    _callback.enqueueMessage(_stateReuse);
}
Also used : BootstrapDatabaseTooOldException(com.linkedin.databus2.core.container.request.BootstrapDatabaseTooOldException) Checkpoint(com.linkedin.databus.core.Checkpoint) InputStream(java.io.InputStream) InvalidCheckpointException(com.linkedin.databus.core.InvalidCheckpointException) ObjectMapper(org.codehaus.jackson.map.ObjectMapper) BootstrapDatabaseTooOldException(com.linkedin.databus2.core.container.request.BootstrapDatabaseTooOldException) InvalidCheckpointException(com.linkedin.databus.core.InvalidCheckpointException)

Example 67 with Checkpoint

use of com.linkedin.databus.core.Checkpoint in project databus by linkedin.

the class GenericDispatcher method doDispatchEvents.

protected void doDispatchEvents() {
    boolean debugEnabled = _log.isDebugEnabled();
    boolean traceEnabled = _log.isTraceEnabled();
    //need to remove eventually but for now I want to avoid a nasty diff
    final DispatcherState curState = _internalState;
    if (!_stopDispatch.get() && !curState.getEventsIterator().hasNext() && !checkForShutdownRequest()) {
        if (debugEnabled)
            _log.debug("Waiting for events");
        curState.getEventsIterator().await(50, TimeUnit.MILLISECONDS);
    }
    boolean success = true;
    boolean hasQueuedEvents = false;
    while (success && !_stopDispatch.get() && curState.getStateId() != DispatcherState.StateId.STOP_DISPATCH_EVENTS && null != curState.getEventsIterator() && curState.getEventsIterator().hasNext() && !checkForShutdownRequest() && //exit the event processing loop if there are other queued notifications
    !hasMessages()) {
        DbusEventInternalReadable nextEvent = curState.getEventsIterator().next();
        _currentWindowSizeInBytes += nextEvent.size();
        if (traceEnabled)
            _log.trace("Got event:" + nextEvent);
        Long eventSrcId = (long) nextEvent.srcId();
        if (curState.isSCNRegress()) {
            SingleSourceSCN scn = new SingleSourceSCN(nextEvent.physicalPartitionId(), nextEvent.sequence());
            _log.info("We are regressing to SCN: " + scn);
            curState.switchToRollback();
            doRollback(curState, scn, false, false);
            curState.setSCNRegress(false);
            curState.switchToExpectEventWindow();
        }
        if (null != getAsyncCallback().getStats())
            getAsyncCallback().getStats().registerWindowSeen(nextEvent.timestampInNanos(), nextEvent.sequence());
        if (nextEvent.isControlMessage()) {
            //control event
            if (nextEvent.isEndOfPeriodMarker()) {
                if (curState.isEventsSeen()) {
                    if (null != curState.getCurrentSource()) {
                        curState.switchToEndStreamSource();
                        success = doEndStreamSource(curState);
                    }
                    SCN endWinScn = null;
                    if (success) {
                        _lastWindowScn = nextEvent.sequence();
                        _lastEowTsNsecs = nextEvent.timestampInNanos();
                        endWinScn = new SingleSourceSCN(nextEvent.physicalPartitionId(), _lastWindowScn);
                        curState.switchToEndStreamEventWindow(endWinScn);
                        success = doEndStreamEventWindow(curState);
                    }
                    if (success) {
                        try {
                            //end of period event
                            Checkpoint cp = createCheckpoint(curState, nextEvent);
                            success = doStoreCheckpoint(curState, nextEvent, cp, endWinScn);
                        } catch (SharedCheckpointException e) {
                            //shutdown
                            return;
                        }
                    }
                } else {
                    //empty window
                    success = true;
                    if (_log.isDebugEnabled()) {
                        _log.debug("skipping empty window: " + nextEvent.sequence());
                    }
                    //write a checkpoint; takes care of slow sources ; but skip storing the first control eop with 0 scn
                    if (nextEvent.sequence() > 0) {
                        _lastWindowScn = nextEvent.sequence();
                        //The reason is that the eop's timestamp is the max timestamp of all data events seen so far.
                        if (nextEvent.timestampInNanos() > 0) {
                            _lastEowTsNsecs = nextEvent.timestampInNanos();
                        }
                        Checkpoint ckpt = createCheckpoint(curState, nextEvent);
                        try {
                            success = doStoreCheckpoint(curState, nextEvent, ckpt, new SingleSourceSCN(nextEvent.physicalPartitionId(), nextEvent.sequence()));
                        } catch (SharedCheckpointException e) {
                            //shutdown
                            return;
                        }
                    } else {
                        _log.warn("EOP with scn=" + nextEvent.sequence());
                    }
                }
                if (success) {
                    curState.switchToExpectEventWindow();
                    //we have recovered from the error  and it's not the dummy window
                    if (nextEvent.sequence() > 0) {
                        if (!getStatus().isRunningStatus())
                            getStatus().resume();
                    }
                }
            } else if (nextEvent.isErrorEvent()) {
                _log.info("Error event: " + nextEvent.sequence());
                success = processErrorEvent(curState, nextEvent);
            } else {
                //control event
                success = processSysEvent(curState, nextEvent);
                if (success) {
                    if (nextEvent.isCheckpointMessage()) {
                        Checkpoint sysCheckpt = createCheckpoint(curState, nextEvent);
                        try {
                            long scn = sysCheckpt.getConsumptionMode() == DbusClientMode.ONLINE_CONSUMPTION ? nextEvent.sequence() : sysCheckpt.getBootstrapSinceScn();
                            //ensure that control event with 0 scn doesn't get saved unless it is during snapshot of bootstrap
                            if (scn > 0 || sysCheckpt.getConsumptionMode() == DbusClientMode.BOOTSTRAP_SNAPSHOT) {
                                success = doStoreCheckpoint(curState, nextEvent, sysCheckpt, new SingleSourceSCN(nextEvent.physicalPartitionId(), scn));
                            }
                        } catch (SharedCheckpointException e) {
                            //shutdown
                            return;
                        }
                    }
                }
            }
        } else {
            curState.setEventsSeen(true);
            //not a control event
            if (curState.getStateId().equals(StateId.EXPECT_EVENT_WINDOW) || curState.getStateId().equals(StateId.REPLAY_DATA_EVENTS)) {
                SCN startScn = new SingleSourceSCN(nextEvent.physicalPartitionId(), nextEvent.sequence());
                curState.switchToStartStreamEventWindow(startScn);
                success = doStartStreamEventWindow(curState);
                if (success && (eventSrcId.longValue() >= 0)) {
                    success = doCheckStartSource(curState, eventSrcId, new SchemaId(nextEvent.schemaId()));
                }
            } else {
                if (null != curState.getCurrentSource() && !eventSrcId.equals(curState.getCurrentSource().getId())) {
                    curState.switchToEndStreamSource();
                    success = doEndStreamSource(curState);
                }
                if (success) {
                    //Check if schemas of the source exist.
                    //Also check if the exact schema id present in event exists in the client. This is worthwhile if there's a
                    //guarantee that the entire window is written with the same schemaId, which is the case if the relay does not use a new schema
                    //mid-window
                    success = doCheckStartSource(curState, eventSrcId, new SchemaId(nextEvent.schemaId()));
                }
            }
            if (success) {
                //finally: process data event
                success = processDataEvent(curState, nextEvent);
                if (success) {
                    hasQueuedEvents = true;
                    if (hasCheckpointThresholdBeenExceeded()) {
                        _log.info("Attempting to checkpoint (only if the consumer callback for onCheckpoint returns SUCCESS), because " + getCurrentWindowSizeInBytes() + " bytes reached without checkpoint ");
                        success = processDataEventsBatch(curState);
                        if (success) {
                            hasQueuedEvents = false;
                            //checkpoint: for bootstrap it's the right checkpoint; that has been lazily created by a checkpoint event
                            // checkpoint: for relay: create a checkpoint that has the prevScn
                            Checkpoint cp = createCheckpoint(curState, nextEvent);
                            // DDSDBUS-1889 : scn for bootstrap is bootstrapSinceSCN
                            // scn for online consumption is : currentWindow
                            SCN lastScn = cp.getConsumptionMode() == DbusClientMode.ONLINE_CONSUMPTION ? curState.getStartWinScn() : new SingleSourceSCN(nextEvent.physicalPartitionId(), cp.getBootstrapSinceScn());
                            try {
                                // Even if storeCheckpoint fails, we
                                // should continue (hoping for the best)
                                success = doStoreCheckpoint(curState, nextEvent, cp, lastScn);
                            } catch (SharedCheckpointException e) {
                                // shutdown
                                return;
                            }
                            curState.switchToExpectStreamDataEvents();
                            if (!getStatus().isRunningStatus())
                                getStatus().resume();
                        }
                    }
                }
            }
        }
        if (success) {
            // before next successful checkpoint
            if (hasCheckpointThresholdBeenExceeded()) {
                //drain events just in case it hasn't been drained before; mainly control events that are not checkpoint events
                success = processDataEventsBatch(curState);
                if (success) {
                    _log.warn("Checkpoint not stored, but removing older events from buffer to guarantee progress (checkpoint threshold has" + " exceeded), consider checkpointing more frequently. Triggered on control-event=" + nextEvent.isControlMessage());
                    // guarantee progress: risk being unable to rollback by
                    // removing events, but hope for the best
                    removeEvents(curState);
                }
            }
        }
    }
    if (!_stopDispatch.get() && !checkForShutdownRequest()) {
        if (success) {
            if (hasQueuedEvents) {
                success = processDataEventsBatch(curState);
                if (!success) {
                    _log.error("Unable to flush partial window");
                }
            }
            if (debugEnabled)
                _log.debug("doDispatchEvents to " + curState.toString());
        }
        if (!success) {
            curState.switchToRollback();
            doRollback(curState);
        }
        //loop around -- let any other messages be processed
        enqueueMessage(curState);
    }
}
Also used : Checkpoint(com.linkedin.databus.core.Checkpoint) DbusEventInternalReadable(com.linkedin.databus.core.DbusEventInternalReadable) SchemaId(com.linkedin.databus2.schemas.SchemaId) SCN(com.linkedin.databus.client.pub.SCN)

Example 68 with Checkpoint

use of com.linkedin.databus.core.Checkpoint in project databus by linkedin.

the class RelayDispatcher method processSysEvent.

@Override
protected boolean processSysEvent(DispatcherState curState, DbusEvent event) {
    boolean success = true;
    if (event.isCheckpointMessage()) {
        Checkpoint ckpt = null;
        try {
            ckpt = DbusEventUtils.getCheckpointFromEvent(event);
            DbusClientMode bootstrapMode = ckpt.getConsumptionMode();
            if (bootstrapMode != DbusClientMode.ONLINE_CONSUMPTION) {
                if (_bootstrapPuller == null) {
                    _log.error("Checkpoint specifies that the consumer is bootstrapping, but bootstrapPuller is not present (Is bootstrap disabled ?)");
                    return false;
                }
                ckpt.setConsumptionMode(DbusClientMode.BOOTSTRAP_SNAPSHOT);
                if (curState.getStateId() != DispatcherState.StateId.EXPECT_EVENT_WINDOW) {
                    _log.warn("The current state of the dispatcher is NOT as expected (" + DispatcherState.StateId.EXPECT_EVENT_WINDOW.name() + "). State prior to this: " + curState.getStateId().name());
                //Fixing bug that caused TestRelayBootstrapSwitch to fail; no apparent need to rollback
                //curState.switchToRollback();
                //doRollback(curState);
                }
                curState.getEventsIterator().getEventBuffer().clear();
                curState.resetIterators();
                curState.switchToExpectEventWindow();
                _bootstrapPuller.enqueueMessage(LifecycleMessage.createStartMessage());
                _log.info("Switching to bootstrap mode");
            } else {
                success = super.processSysEvent(curState, event);
            }
        } catch (Exception e) {
            DbusPrettyLogUtils.logExceptionAtError("Internal error processing a system event", e, _log);
            success = false;
        }
    } else if (event.isSCNRegressMessage()) {
        SCNRegressMessage message = DbusEventUtils.getSCNRegressFromEvent(event);
        _log.info("Switching relays, some of the events maybe replayed. The Checkpoint to which the client with regress: " + message);
        curState.setSCNRegress(true);
        curState.switchToExpectEventWindow();
    //enqueueMessage(curState);
    } else {
        success = super.processSysEvent(curState, event);
    }
    return success;
}
Also used : Checkpoint(com.linkedin.databus.core.Checkpoint) SCNRegressMessage(com.linkedin.databus.core.SCNRegressMessage) DbusClientMode(com.linkedin.databus.core.DbusClientMode)

Example 69 with Checkpoint

use of com.linkedin.databus.core.Checkpoint in project databus by linkedin.

the class RelayPullThread method doBootstrap.

protected void doBootstrap(ConnectionState curState) {
    if (null != _lastOpenConnection) {
        _lastOpenConnection.close();
        _lastOpenConnection = null;
    }
    Checkpoint bootstrapCkpt = null;
    if (_sourcesConn.getBootstrapPuller() == null) {
        _log.warn("doBootstrap got called, but BootstrapPullThread is null. Is bootstrap disabled?");
        return;
    }
    try {
        bootstrapCkpt = curState.getCheckpoint().clone();
    } catch (Exception e) {
        String msg = "Error copying checkpoint:" + curState.getCheckpoint();
        _log.error(msg, e);
        BootstrapResultMessage bootstrapResultMessage = BootstrapResultMessage.createBootstrapFailedMessage(e);
        doBootstrapFailed(bootstrapResultMessage);
        return;
    }
    if (!bootstrapCkpt.isBootstrapStartScnSet()) {
        bootstrapCkpt = curState.getBstCheckpointHandler().createInitialBootstrapCheckpoint(bootstrapCkpt, bootstrapCkpt.getWindowScn());
    //bootstrapCkpt.setBootstrapSinceScn(Long.valueOf(bootstrapCkpt.getWindowScn()));
    }
    _log.info("Bootstrap begin: sinceScn=" + bootstrapCkpt.getWindowScn());
    CheckpointMessage bootstrapCpMessage = CheckpointMessage.createSetCheckpointMessage(bootstrapCkpt);
    _sourcesConn.getBootstrapPuller().enqueueMessage(bootstrapCpMessage);
    try {
        Checkpoint cpForDispatcher = new Checkpoint(bootstrapCkpt.toString());
        cpForDispatcher.setConsumptionMode(DbusClientMode.BOOTSTRAP_SNAPSHOT);
        DbusEvent cpEvent = getEventFactory().createCheckpointEvent(cpForDispatcher);
        writeEventToRelayDispatcher(curState, cpEvent, "Control Event to start bootstrap");
        curState.switchToBootstrapRequested();
    } catch (InterruptedException ie) {
        _log.error("Got interrupted while writing control message to bootstrap !!", ie);
        enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(ie));
    } catch (Exception e) {
        enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(e));
        _log.error("Exception occured while switching to bootstrap: ", e);
    }
//wait for bootstrap to finish
/*boolean bootstrapDone = false;
    while (! bootstrapDone)
    {
      try
      {
        bootstrapPullerThread.join();
        bootstrapDone = true;
      }
      catch (InterruptedException ie) {}
    }*/
}
Also used : Checkpoint(com.linkedin.databus.core.Checkpoint) DbusEvent(com.linkedin.databus.core.DbusEvent) InvalidEventException(com.linkedin.databus.core.InvalidEventException) ScnNotFoundException(com.linkedin.databus.core.ScnNotFoundException) PendingEventTooLargeException(com.linkedin.databus.core.PendingEventTooLargeException) BootstrapDatabaseTooOldException(com.linkedin.databus2.core.container.request.BootstrapDatabaseTooOldException) DatabusException(com.linkedin.databus2.core.DatabusException) PullerRetriesExhaustedException(com.linkedin.databus.core.PullerRetriesExhaustedException)

Example 70 with Checkpoint

use of com.linkedin.databus.core.Checkpoint in project databus by linkedin.

the class RelayPullThread method doRegisterResponseSuccess.

protected void doRegisterResponseSuccess(ConnectionState curState) {
    boolean enqueueMessage = true;
    mergeRelayCallsStats();
    if (curState.getSourcesSchemas().size() < _sourcesConn.getSourcesNames().size()) {
        _log.error("Expected " + _sourcesConn.getSourcesNames().size() + " schemas, got: " + curState.getSourcesSchemas().size());
        curState.switchToPickServer();
    } else {
        _sourcesConn.getRelayDispatcher().enqueueMessage(SourcesMessage.createSetSourcesSchemasMessage(curState.getSourcesSchemas(), curState.getMetadataSchemas()));
        // Determine the checkpoint for read events in the following order
        // 1. Existing checkpoint in the current state
        // 2. Checkpoint persisted on disk
        // 3. New checkpoint
        Checkpoint cp = _currentState.getCheckpoint();
        if (null == cp) {
            _log.info("no existing checkpoint found; attempting to load persistent checkpoint");
            cp = _sourcesConn.loadPersistentCheckpoint();
        }
        if (null == cp) {
            _log.info(getName() + ": no checkpoint found");
            cp = new Checkpoint();
            // set the mode to streaming first so relay will inspect the scn
            cp.setConsumptionMode(DbusClientMode.ONLINE_CONSUMPTION);
            // setting windowScn makes server to return scn not found error
            cp.setWindowScn(0L);
            cp.clearBootstrapStartScn();
            if (_isConsumeCurrent) {
                cp.setFlexible();
                _log.info("Setting flexible checkpoint: consumeCurrent is true");
            }
        } else {
            _log.info("persisted checkpoint loaded: " + cp.toString());
        }
        if (cp.getFlexible())
            curState.setFlexibleCheckpointRequest(true);
        if (toTearConnAfterHandlingResponse()) {
            tearConnectionAndEnqueuePickServer();
            enqueueMessage = false;
        } else {
            if (_sourcesConn.isBootstrapEnabled()) {
                _sourcesConn.getBootstrapPuller().enqueueMessage(SourcesMessage.createSetSourcesSchemasMessage(curState.getSourcesSchemas(), curState.getMetadataSchemas()));
            }
            if (DbusClientMode.BOOTSTRAP_SNAPSHOT == cp.getConsumptionMode() || DbusClientMode.BOOTSTRAP_CATCHUP == cp.getConsumptionMode()) {
                curState.setRelayFellOff(true);
                if (_sourcesConn.isBootstrapEnabled()) {
                    curState.switchToBootstrap(cp);
                } else {
                    String message = "bootstrap checkpoint found but bootstrapping is disabled:" + cp;
                    _log.error(message);
                    _status.suspendOnError(new DatabusException(message));
                    enqueueMessage = false;
                }
            } else {
                if (cp.getWindowOffset() > 0) {
                    // switched when in middle of Window
                    _log.info("RelayPuller reconnecting when in middle of event window. Will regress. Current Checkpoint :" + cp);
                    if (cp.getPrevScn() > 0) {
                        cp.setWindowScn(cp.getPrevScn());
                        cp.setWindowOffset(-1);
                        curState.setSCNRegress(true);
                    } else if (curState.isFlexibleCheckpointRequest()) {
                        _log.info("Switched relays after reading partial window with flexible checkpoint !!");
                        cp.setFlexible();
                        curState.setSCNRegress(true);
                    } else {
                        _log.fatal("Checkpoint does not have prevSCN !!. Suspending !! Checkpoint :" + cp);
                        enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(new Exception("Checkpoint does not have prevSCN !!. Suspending !! Checkpoint :" + cp)));
                        enqueueMessage = false;
                    }
                }
                if (enqueueMessage)
                    curState.switchToRequestStream(cp);
            }
        }
    }
    if (enqueueMessage)
        enqueueMessage(curState);
}
Also used : Checkpoint(com.linkedin.databus.core.Checkpoint) DatabusException(com.linkedin.databus2.core.DatabusException) InvalidEventException(com.linkedin.databus.core.InvalidEventException) ScnNotFoundException(com.linkedin.databus.core.ScnNotFoundException) PendingEventTooLargeException(com.linkedin.databus.core.PendingEventTooLargeException) BootstrapDatabaseTooOldException(com.linkedin.databus2.core.container.request.BootstrapDatabaseTooOldException) DatabusException(com.linkedin.databus2.core.DatabusException) PullerRetriesExhaustedException(com.linkedin.databus.core.PullerRetriesExhaustedException)

Aggregations

Checkpoint (com.linkedin.databus.core.Checkpoint)139 Test (org.testng.annotations.Test)88 ArrayList (java.util.ArrayList)46 RegisterResponseEntry (com.linkedin.databus2.core.container.request.RegisterResponseEntry)42 HashMap (java.util.HashMap)42 List (java.util.List)42 IdNamePair (com.linkedin.databus.core.util.IdNamePair)34 DefaultHttpResponse (org.jboss.netty.handler.codec.http.DefaultHttpResponse)29 ChannelBuffer (org.jboss.netty.buffer.ChannelBuffer)27 DefaultHttpChunk (org.jboss.netty.handler.codec.http.DefaultHttpChunk)25 HttpResponse (org.jboss.netty.handler.codec.http.HttpResponse)23 HttpChunk (org.jboss.netty.handler.codec.http.HttpChunk)22 BootstrapDatabaseTooOldException (com.linkedin.databus2.core.container.request.BootstrapDatabaseTooOldException)20 DefaultHttpChunkTrailer (org.jboss.netty.handler.codec.http.DefaultHttpChunkTrailer)16 HttpChunkTrailer (org.jboss.netty.handler.codec.http.HttpChunkTrailer)16 DatabusSubscription (com.linkedin.databus.core.data_model.DatabusSubscription)15 IOException (java.io.IOException)15 ServerInfo (com.linkedin.databus.client.pub.ServerInfo)14 Logger (org.apache.log4j.Logger)14 InetSocketAddress (java.net.InetSocketAddress)13