Search in sources :

Example 16 with UnifiedClientStats

use of com.linkedin.databus.client.pub.mbean.UnifiedClientStats in project databus by linkedin.

the class TestGenericDispatcher method runDispatcherRollback.

/**
     *
     * @param numEvents  number of events that will be written out in the test
     * @param maxWindowSize  size of window expressed as #events
     * @param numFailDataEvent  the nth data event at which failure occurs; 0 == no failures
     * @param numFailCheckpointEvent  the nth checkpoint event at which failure occurs; 0 == no failures
     * @param numFailEndWindow  the nth end-of-window at which failure occurs; 0 == no failures
     * @param thresholdPct  checkpointThresholdPct - forcible checkpoint before end-of-window
     * @param negativeTest  is this test supposed to fail
     * @param numFailures  number of failures expected (across all error types); in effect controls number of rollbacks
     * @param bootstrapCheckpointsPerWindow  k bootstrap checkpoint events are written for every one end-of-window event
     * @param timeTakenForDataEventInMs  time taken for processing data events
     * @param timeTakenForControlEventInMs  time taken for processing control events
     * @param wrapAround  use a smaller producer buffer so that events will wrap around
     */
protected void runDispatcherRollback(int numEvents, int maxWindowSize, int numFailDataEvent, int numFailCheckpointEvent, int numFailEndWindow, double thresholdPct, boolean negativeTest, int numFailures, int bootstrapCheckpointsPerWindow, long timeTakenForDataEventInMs, long timeTakenForControlEventInMs, boolean wrapAround) throws Exception {
    LOG.info("Running dispatcher rollback with: " + "numEvents=" + numEvents + " maxWindowSize=" + maxWindowSize + " numFailDataEvent=" + numFailDataEvent + " numFailCheckpoint=" + numFailCheckpointEvent + " numFailEndWindow=" + numFailEndWindow + " thresholdPct=" + thresholdPct + " negativeTest=" + negativeTest + " numFailures=" + numFailures + " bootstrapCheckpointsPerWindow=" + bootstrapCheckpointsPerWindow + " timeTakenForDataEventsInMs=" + timeTakenForDataEventInMs + " timeTakenForControlEventsInMs=" + timeTakenForControlEventInMs + " wrapAround=" + wrapAround);
    /* Experiment setup */
    int payloadSize = 20;
    int numCheckpoints = numEvents / maxWindowSize;
    /* Consumer creation */
    // set up consumer to fail on data callback at the nth event
    TimeoutTestConsumer tConsumer = new TimeoutTestConsumer(timeTakenForDataEventInMs, timeTakenForControlEventInMs, numFailCheckpointEvent, numFailDataEvent, numFailEndWindow, numFailures);
    HashMap<Long, List<RegisterResponseEntry>> schemaMap = new HashMap<Long, List<RegisterResponseEntry>>();
    short srcId = 1;
    List<RegisterResponseEntry> l1 = new ArrayList<RegisterResponseEntry>();
    l1.add(new RegisterResponseEntry(1L, srcId, SOURCE1_SCHEMA_STR));
    schemaMap.put(1L, l1);
    Map<Long, IdNamePair> sourcesMap = new HashMap<Long, IdNamePair>();
    List<String> sources = new ArrayList<String>();
    for (int i = 1; i <= 1; ++i) {
        IdNamePair sourcePair = new IdNamePair((long) i, "source" + i);
        sources.add(sourcePair.getName());
        sourcesMap.put(sourcePair.getId(), sourcePair);
    }
    long consumerTimeBudgetMs = 60 * 1000;
    DatabusV2ConsumerRegistration consumerReg = new DatabusV2ConsumerRegistration(tConsumer, sources, null);
    List<DatabusV2ConsumerRegistration> allRegistrations = Arrays.asList(consumerReg);
    final UnifiedClientStats unifiedStats = new UnifiedClientStats(0, "test", "test.unified");
    // single-threaded execution of consumer
    MultiConsumerCallback mConsumer = new MultiConsumerCallback(allRegistrations, Executors.newFixedThreadPool(1), consumerTimeBudgetMs, new StreamConsumerCallbackFactory(null, unifiedStats), null, unifiedStats, null, null);
    /* Generate events */
    Vector<DbusEvent> srcTestEvents = new Vector<DbusEvent>();
    Vector<Short> srcIdList = new Vector<Short>();
    srcIdList.add(srcId);
    DbusEventGenerator evGen = new DbusEventGenerator(0, srcIdList);
    Assert.assertTrue(evGen.generateEvents(numEvents, maxWindowSize, 512, payloadSize, srcTestEvents) > 0);
    int totalSize = 0;
    int maxSize = 0;
    for (DbusEvent e : srcTestEvents) {
        totalSize += e.size();
        maxSize = (e.size() > maxSize) ? e.size() : maxSize;
    }
    /* Source configuration */
    double thresholdChkptPct = thresholdPct;
    DatabusSourcesConnection.Config conf = new DatabusSourcesConnection.Config();
    conf.setCheckpointThresholdPct(thresholdChkptPct);
    conf.getDispatcherRetries().setMaxRetryNum(10);
    conf.setFreeBufferThreshold(maxSize);
    conf.setConsumerTimeBudgetMs(consumerTimeBudgetMs);
    int freeBufferThreshold = conf.getFreeBufferThreshold();
    DatabusSourcesConnection.StaticConfig connConfig = conf.build();
    // make buffer large enough to hold data; the control events are large that contain checkpoints
    int producerBufferSize = wrapAround ? totalSize : totalSize * 2 + numCheckpoints * 10 * maxSize * 5 + freeBufferThreshold;
    int individualBufferSize = producerBufferSize;
    int indexSize = producerBufferSize / 10;
    int stagingBufferSize = producerBufferSize;
    /* Event Buffer creation */
    TestGenericDispatcherEventBuffer dataEventsBuffer = new TestGenericDispatcherEventBuffer(getConfig(producerBufferSize, individualBufferSize, indexSize, stagingBufferSize, AllocationPolicy.HEAP_MEMORY, QueuePolicy.BLOCK_ON_WRITE));
    List<DatabusSubscription> subs = DatabusSubscription.createSubscriptionList(sources);
    /* Generic Dispatcher creation */
    TestDispatcher<DatabusCombinedConsumer> dispatcher = new TestDispatcher<DatabusCombinedConsumer>("rollBackcheck", connConfig, subs, new InMemoryPersistenceProvider(), dataEventsBuffer, mConsumer, bootstrapCheckpointsPerWindow == 0);
    /* Launch writer */
    DbusEventAppender eventProducer = new DbusEventAppender(srcTestEvents, dataEventsBuffer, bootstrapCheckpointsPerWindow, null);
    Thread tEmitter = new Thread(eventProducer);
    tEmitter.start();
    /* Launch dispatcher */
    Thread tDispatcher = new Thread(dispatcher);
    tDispatcher.start();
    /* Now initialize this state machine */
    dispatcher.enqueueMessage(SourcesMessage.createSetSourcesIdsMessage(sourcesMap.values()));
    dispatcher.enqueueMessage(SourcesMessage.createSetSourcesSchemasMessage(schemaMap));
    // be generous; use worst case for num control events
    long waitTimeMs = (numEvents * timeTakenForDataEventInMs + numEvents * timeTakenForControlEventInMs) * 4;
    tEmitter.join(waitTimeMs);
    // wait for dispatcher to finish reading the events
    tDispatcher.join(waitTimeMs);
    Assert.assertFalse(tEmitter.isAlive());
    System.out.println("tConsumer: " + tConsumer);
    int windowBeforeDataFail = (numFailDataEvent / maxWindowSize);
    int expectedDataFaults = numFailDataEvent == 0 ? 0 : numFailures;
    int expectedCheckPointFaults = (numFailCheckpointEvent == 0 || (expectedDataFaults != 0 && numFailCheckpointEvent == windowBeforeDataFail)) ? 0 : numFailures;
    // check if all windows were logged by dispatcher; in online case;
    if (bootstrapCheckpointsPerWindow == 0) {
        Assert.assertTrue(dispatcher.getNumCheckPoints() >= (numCheckpoints - expectedCheckPointFaults));
    }
    // Consumer prespective
    // 1 or 0 faults  injected in data callbacks; success (store) differs callback by 1
    Assert.assertEquals("Mismatch between callbacks and stored data on consumer.", expectedDataFaults, tConsumer.getDataCallbackCount() - tConsumer.getStoredDataCount());
    Assert.assertTrue(tConsumer.getStoredDataCount() >= tConsumer.getNumUniqStoredEvents());
    Assert.assertEquals("Consumer failed to store expected number of checkpoints.", dispatcher.getNumCheckPoints(), tConsumer.getStoredCheckpointCount());
    // it's converted to ConsumerCallbackResult.SKIP_CHECKPOINT and therefore not seen by client metrics.
    if (expectedCheckPointFaults == 0 || expectedDataFaults > 0 || negativeTest) {
        Assert.assertTrue("Unexpected error count in consumer metrics (" + unifiedStats.getNumConsumerErrors() + "); should be greater than or equal to numFailures (" + numFailures + ").", unifiedStats.getNumConsumerErrors() >= numFailures);
    } else {
        Assert.assertEquals("Unexpected error count in consumer metrics; checkpoint errors shouldn't count. ", // unless negativeTest ...
        0, unifiedStats.getNumConsumerErrors());
    }
    // rollback behaviour; were all events re-sent?
    if (!negativeTest) {
        Assert.assertTrue(tConsumer.getNumUniqStoredEvents() == numEvents);
    } else {
        Assert.assertTrue(tConsumer.getNumUniqStoredEvents() < numEvents);
    }
    dispatcher.shutdown();
    verifyNoLocks(null, dataEventsBuffer);
}
Also used : DatabusV2ConsumerRegistration(com.linkedin.databus.client.consumer.DatabusV2ConsumerRegistration) DbusEventAppender(com.linkedin.databus.core.test.DbusEventAppender) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) IdNamePair(com.linkedin.databus.core.util.IdNamePair) Vector(java.util.Vector) SelectingDatabusCombinedConsumer(com.linkedin.databus.client.consumer.SelectingDatabusCombinedConsumer) DatabusCombinedConsumer(com.linkedin.databus.client.pub.DatabusCombinedConsumer) DelegatingDatabusCombinedConsumer(com.linkedin.databus.client.consumer.DelegatingDatabusCombinedConsumer) AbstractDatabusCombinedConsumer(com.linkedin.databus.client.consumer.AbstractDatabusCombinedConsumer) StreamConsumerCallbackFactory(com.linkedin.databus.client.consumer.StreamConsumerCallbackFactory) UnifiedClientStats(com.linkedin.databus.client.pub.mbean.UnifiedClientStats) DbusEvent(com.linkedin.databus.core.DbusEvent) MultiConsumerCallback(com.linkedin.databus.client.consumer.MultiConsumerCallback) DbusEventGenerator(com.linkedin.databus.core.test.DbusEventGenerator) DatabusSubscription(com.linkedin.databus.core.data_model.DatabusSubscription) Checkpoint(com.linkedin.databus.core.Checkpoint) UncaughtExceptionTrackingThread(com.linkedin.databus.core.util.UncaughtExceptionTrackingThread) RegisterResponseEntry(com.linkedin.databus2.core.container.request.RegisterResponseEntry)

Example 17 with UnifiedClientStats

use of com.linkedin.databus.client.pub.mbean.UnifiedClientStats in project databus by linkedin.

the class BootstrapPullThread method doReadBootstrapEvents.

protected void doReadBootstrapEvents(ConnectionState curState) {
    boolean success = true;
    boolean debugEnabled = _log.isDebugEnabled();
    boolean enqueueMessage = true;
    try {
        Checkpoint cp = curState.getCheckpoint();
        DbusEventBuffer eventBuffer = curState.getDataEventsBuffer();
        if (debugEnabled)
            _log.debug("Sending bootstrap events to buffer");
        //eventBuffer.startEvents();
        DbusEventInternalReadable cpEvent = getEventFactory().createCheckpointEvent(cp);
        byte[] cpEventBytes = new byte[cpEvent.size()];
        if (debugEnabled) {
            _log.debug("checkpoint event size: " + cpEventBytes.length);
            _log.debug("checkpoint event:" + cpEvent.toString());
        }
        cpEvent.getRawBytes().get(cpEventBytes);
        ByteArrayInputStream cpIs = new ByteArrayInputStream(cpEventBytes);
        ReadableByteChannel cpRbc = Channels.newChannel(cpIs);
        UnifiedClientStats unifiedClientStats = _sourcesConn.getUnifiedClientStats();
        sendHeartbeat(unifiedClientStats);
        int ecnt = eventBuffer.readEvents(cpRbc);
        success = (ecnt > 0);
        if (!success) {
            _log.error("Unable to write bootstrap phase marker");
        } else {
            ChunkedBodyReadableByteChannel readChannel = curState.getReadChannel();
            String remoteErrorName = RemoteExceptionHandler.getExceptionName(readChannel);
            Throwable remoteError = _remoteExceptionHandler.getException(readChannel);
            if (null != remoteError && remoteError instanceof BootstrapDatabaseTooOldException) {
                _log.error("Bootstrap database is too old!");
                _remoteExceptionHandler.handleException(remoteError);
                curState.switchToStreamResponseError();
            } else if (null != remoteErrorName) {
                //remote processing error
                _log.error("read events error: " + RemoteExceptionHandler.getExceptionMessage(readChannel));
                curState.switchToStreamResponseError();
            } else {
                sendHeartbeat(unifiedClientStats);
                int eventsNum = eventBuffer.readEvents(readChannel, curState.getListeners(), _sourcesConn.getBootstrapEventsStatsCollector());
                if (eventsNum == 0 && _remoteExceptionHandler.getPendingEventSize(readChannel) > eventBuffer.getMaxReadBufferCapacity()) {
                    String err = "ReadBuffer max capacity(" + eventBuffer.getMaxReadBufferCapacity() + ") is less than event size(" + _remoteExceptionHandler.getPendingEventSize(readChannel) + "). Increase databus.client.connectionDefaults.bstEventBuffer.maxEventSize and restart.";
                    _log.fatal(err);
                    enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(new PendingEventTooLargeException(err)));
                    return;
                } else {
                    resetServerRetries();
                    if (debugEnabled)
                        _log.debug("Sending events to buffer");
                    numEventsInCurrentState += eventsNum;
                    _log.info("Bootstrap events read so far: " + numEventsInCurrentState);
                    String status = readChannel.getMetadata("PhaseCompleted");
                    final BootstrapCheckpointHandler ckptHandler = curState.getBstCheckpointHandler();
                    if (status != null) {
                        // set status in checkpoint to indicate that we are done with the current source
                        if (cp.getConsumptionMode() == DbusClientMode.BOOTSTRAP_CATCHUP) {
                            ckptHandler.finalizeCatchupPhase(cp);
                        } else if (cp.getConsumptionMode() == DbusClientMode.BOOTSTRAP_SNAPSHOT) {
                            ckptHandler.finalizeSnapshotPhase(cp);
                        } else {
                            throw new RuntimeException("Invalid bootstrap phase: " + cp.getConsumptionMode());
                        }
                        _log.info("Bootstrap events read :" + numEventsInCurrentState + " during phase:" + cp.getConsumptionMode() + " [" + cp.getBootstrapSnapshotSourceIndex() + "," + cp.getBootstrapCatchupSourceIndex() + "]");
                        numEventsInCurrentState = 0;
                    } else {
                        // question: how is snapshotOffset maintained in ckpt
                        if (eventsNum > 0) {
                            cp.bootstrapCheckPoint();
                        }
                    }
                    curState.switchToStreamResponseDone();
                }
            }
        }
    } catch (InterruptedException ie) {
        _log.error("interupted", ie);
        success = false;
    } catch (InvalidEventException e) {
        _log.error("error reading events from server: " + e.getMessage(), e);
        success = false;
    } catch (RuntimeException e) {
        _log.error("runtime error reading events from server: " + e.getMessage(), e);
        success = false;
    }
    if (toTearConnAfterHandlingResponse()) {
        tearConnectionAndEnqueuePickServer();
        enqueueMessage = false;
    } else if (!success) {
        curState.switchToPickServer();
    }
    if (enqueueMessage)
        enqueueMessage(curState);
}
Also used : ReadableByteChannel(java.nio.channels.ReadableByteChannel) UnifiedClientStats(com.linkedin.databus.client.pub.mbean.UnifiedClientStats) PendingEventTooLargeException(com.linkedin.databus.core.PendingEventTooLargeException) DbusEventInternalReadable(com.linkedin.databus.core.DbusEventInternalReadable) Checkpoint(com.linkedin.databus.core.Checkpoint) BootstrapCheckpointHandler(com.linkedin.databus.core.BootstrapCheckpointHandler) DbusEventBuffer(com.linkedin.databus.core.DbusEventBuffer) BootstrapDatabaseTooOldException(com.linkedin.databus2.core.container.request.BootstrapDatabaseTooOldException) Checkpoint(com.linkedin.databus.core.Checkpoint) ByteArrayInputStream(java.io.ByteArrayInputStream) InvalidEventException(com.linkedin.databus.core.InvalidEventException)

Example 18 with UnifiedClientStats

use of com.linkedin.databus.client.pub.mbean.UnifiedClientStats in project databus by linkedin.

the class TestUnifiedClientStats method testHistogramMetricsAggregationNonOverlappingRanges.

/**
   * Tests aggregation (merging) of the histogram/percentile metrics (timeLagSourceToReceiptMs and
   * timeLagConsumerCallbacksMs).  This is basically the "happy path" case.
   *
   * Blast out 1000 low data values for stats #1 and 1000 high data values for stats #2 (interleaved so
   * timestamps [and therefore priorities] are comparable), then merge and verify that max is within #2's
   * range and that median falls between the two ranges.  (There's no guarantee that #1's minimum or #2's
   * maximum will survive, but roughly half of each one's values should, so the min and max are guaranteed
   * to fall within #1's and #2's range, respectively.)
   */
@Test
public void testHistogramMetricsAggregationNonOverlappingRanges() {
    // create stats objects:  two low-level (per-connection) ones and one aggregator
    UnifiedClientStats unifiedClientStats1 = new UnifiedClientStats(1, /* ownerId */
    "test1", "dim1");
    UnifiedClientStats unifiedClientStats2 = new UnifiedClientStats(2, /* ownerId */
    "test2", "dim2");
    UnifiedClientStats unifiedClientStatsAgg = new UnifiedClientStats(99, /* ownerId */
    "testAgg", "dimAgg");
    for (// 1028
    int i = 0; // 1028
    i < MergeableExponentiallyDecayingReservoir.DEFAULT_SIZE; // 1028
    ++i) {
        // As noted in testBasicHistogramMetrics(), our dependence on System.currentTimeMillis() may lead
        // to some jitter in the data values for timeLagSourceToReceiptMs.
        long now = System.currentTimeMillis();
        long sourceTimestampNs1 = (now - 1000L - i) * DbusConstants.NUM_NSECS_IN_MSEC;
        long sourceTimestampNs2 = (now - 5000L - i) * DbusConstants.NUM_NSECS_IN_MSEC;
        long callbackTimeElapsedNs1 = (long) i * DbusConstants.NUM_NSECS_IN_MSEC;
        long callbackTimeElapsedNs2 = ((long) i + 2000L) * DbusConstants.NUM_NSECS_IN_MSEC;
        DbusEvent dbusEvent1 = createEvent(sourceTimestampNs1);
        DbusEvent dbusEvent2 = createEvent(sourceTimestampNs2);
        unifiedClientStats1.registerDataEventReceived(dbusEvent1);
        unifiedClientStats2.registerDataEventReceived(dbusEvent2);
        unifiedClientStats1.registerCallbacksProcessed(callbackTimeElapsedNs1);
        unifiedClientStats2.registerCallbacksProcessed(callbackTimeElapsedNs2);
    }
    unifiedClientStatsAgg.merge(unifiedClientStats1);
    unifiedClientStatsAgg.merge(unifiedClientStats2);
    // Expected timeLagConsumerCallbacksMs histogram values (exact):
    //   unifiedClientStats1:  0 to 1027 ms
    //   unifiedClientStats2:  2000 to 3027 ms
    assertEquals("unexpected timeLagConsumerCallbacksMs 50th percentile for connection #1", 513.5, unifiedClientStats1.getTimeLagConsumerCallbacksMs_HistPct_50());
    assertEquals("unexpected timeLagConsumerCallbacksMs 50th percentile for connection #2", 2513.5, unifiedClientStats2.getTimeLagConsumerCallbacksMs_HistPct_50());
    // The exact value depends on the relative fraction of '1' and '2' values that are retained in the
    // aggregate.  If equal, the value should be near 1513.5, but even then the exact value depends on
    // whether the 1027 and 2000 values get bumped out of the aggregate.  In the more common case that
    // the fractions retained are unequal, the median will fall between two values near the top end of
    // unifiedClientStats1 or near the bottom end of unifiedClientStats2.  An allowance of 100 either
    // way should be safe.
    double percentile = unifiedClientStatsAgg.getTimeLagConsumerCallbacksMs_HistPct_50();
    assertTrue("unexpected timeLagConsumerCallbacksMs 50th percentile for aggregated stats: " + percentile, 927.0 <= percentile && percentile <= 2100.0);
    assertEquals("unexpected timeLagConsumerCallbacksMs max value for connection #1", 1027.0, unifiedClientStats1.getTimeLagConsumerCallbacksMs_Max());
    assertEquals("unexpected timeLagConsumerCallbacksMs max value for connection #2", 3027.0, unifiedClientStats2.getTimeLagConsumerCallbacksMs_Max());
    double max = unifiedClientStatsAgg.getTimeLagConsumerCallbacksMs_Max();
    assertTrue("unexpected timeLagConsumerCallbacksMs max value for aggregated stats: " + max, // nominal value is 3027.0
    2000.0 <= max && max <= 3027.0);
    // Expected timeLagSourceToReceiptMs histogram values (approximate):
    //   unifiedClientStats1:  1000 to 2027 ms
    //   unifiedClientStats2:  5000 to 6027 ms
    percentile = unifiedClientStats1.getTimeLagSourceToReceiptMs_HistPct_50();
    assertTrue("unexpected timeLagSourceToReceiptMs 50th percentile for connection #1: " + percentile, // nominal value is 1513.5
    1512.5 <= percentile && percentile <= 1514.5);
    percentile = unifiedClientStats2.getTimeLagSourceToReceiptMs_HistPct_50();
    assertTrue("unexpected timeLagSourceToReceiptMs 50th percentile for connection #2: " + percentile, // nominal value is 5513.5
    5512.5 <= percentile && percentile <= 5514.5);
    // same caveat as above:  the median depends strongly on the relative proportion of unifiedClientStats1
    // and unifiedClientStats2 data points retained in the aggregate, so the inequality is quite loose
    percentile = unifiedClientStatsAgg.getTimeLagSourceToReceiptMs_HistPct_50();
    assertTrue("unexpected timeLagSourceToReceiptMs 50th percentile for aggregated stats: " + percentile, 1927.0 <= percentile && percentile <= 5100.0);
}
Also used : UnifiedClientStats(com.linkedin.databus.client.pub.mbean.UnifiedClientStats) DbusEvent(com.linkedin.databus.core.DbusEvent) Test(org.testng.annotations.Test)

Example 19 with UnifiedClientStats

use of com.linkedin.databus.client.pub.mbean.UnifiedClientStats in project databus by linkedin.

the class TestUnifiedClientStats method testHistogramMetricsAggregationBootstrapMode.

/**
   * Tests aggregation (merging) of the timeLagSourceToReceiptMs histogram/percentile metric in the case
   * that one of the connections is bootstrapping.
   *
   * Blast out 1000 data values for stats #1 and #2, but with the latter in bootstrap mode:
   * timestampLastDataEventWasReceivedMs will be zero for stats #2 (and its reservoir empty), so
   * merging it won't affect the aggregate value for timeLagSourceToReceiptMs; all such aggregate
   * stats should be identical to those for stats #1.  Also, all values for stats #2 should be -1.0,
   * per our design spec.  (This is similar to testHistogramMetricsAggregationDeadSourcesConnection().)
   */
@Test
public void testHistogramMetricsAggregationBootstrapMode() {
    // create stats objects:  two low-level (per-connection) ones and one aggregator
    UnifiedClientStats unifiedClientStats1 = new UnifiedClientStats(1, /* ownerId */
    "test1", "dim1");
    UnifiedClientStats unifiedClientStats2 = new UnifiedClientStats(2, /* ownerId */
    "test2", "dim2");
    UnifiedClientStats unifiedClientStatsAgg = new UnifiedClientStats(99, /* ownerId */
    "testAgg", "dimAgg");
    unifiedClientStats2.setBootstrappingState(true);
    for (// 1028
    int i = 0; // 1028
    i < MergeableExponentiallyDecayingReservoir.DEFAULT_SIZE; // 1028
    ++i) {
        long now = System.currentTimeMillis();
        long sourceTimestampNs1 = (now - 1000L - i) * DbusConstants.NUM_NSECS_IN_MSEC;
        long sourceTimestampNs2 = (now - 5000L - i) * DbusConstants.NUM_NSECS_IN_MSEC;
        unifiedClientStats1.registerDataEventReceived(createEvent(sourceTimestampNs1));
        unifiedClientStats2.registerDataEventReceived(createEvent(sourceTimestampNs2));
    }
    unifiedClientStatsAgg.merge(unifiedClientStats1);
    unifiedClientStatsAgg.merge(unifiedClientStats2);
    assertEquals("unexpected timeLagSourceToReceiptMs 50th percentile for aggregated stats", unifiedClientStats1.getTimeLagSourceToReceiptMs_HistPct_50(), unifiedClientStatsAgg.getTimeLagSourceToReceiptMs_HistPct_50());
    assertEquals("unexpected timeLagSourceToReceiptMs 90th percentile for aggregated stats", unifiedClientStats1.getTimeLagSourceToReceiptMs_HistPct_90(), unifiedClientStatsAgg.getTimeLagSourceToReceiptMs_HistPct_90());
    assertEquals("unexpected timeLagSourceToReceiptMs 95th percentile for aggregated stats", unifiedClientStats1.getTimeLagSourceToReceiptMs_HistPct_95(), unifiedClientStatsAgg.getTimeLagSourceToReceiptMs_HistPct_95());
    assertEquals("unexpected timeLagSourceToReceiptMs 99th percentile for aggregated stats", unifiedClientStats1.getTimeLagSourceToReceiptMs_HistPct_99(), unifiedClientStatsAgg.getTimeLagSourceToReceiptMs_HistPct_99());
    // bootstrap mode => should return -1.0 for all percentiles
    assertEquals("unexpected timeLagSourceToReceiptMs 50th percentile for connection #2", -1.0, unifiedClientStats2.getTimeLagSourceToReceiptMs_HistPct_50());
    assertEquals("unexpected timeLagSourceToReceiptMs 90th percentile for connection #2", -1.0, unifiedClientStats2.getTimeLagSourceToReceiptMs_HistPct_90());
    assertEquals("unexpected timeLagSourceToReceiptMs 95th percentile for connection #2", -1.0, unifiedClientStats2.getTimeLagSourceToReceiptMs_HistPct_95());
    assertEquals("unexpected timeLagSourceToReceiptMs 99th percentile for connection #2", -1.0, unifiedClientStats2.getTimeLagSourceToReceiptMs_HistPct_99());
}
Also used : UnifiedClientStats(com.linkedin.databus.client.pub.mbean.UnifiedClientStats) Test(org.testng.annotations.Test)

Example 20 with UnifiedClientStats

use of com.linkedin.databus.client.pub.mbean.UnifiedClientStats in project databus by linkedin.

the class TestUnifiedClientStats method testBasicHistogramMetrics.

/**
   * Tests the basic (non-aggregated) functionality of the histogram/percentile metrics
   * (timeLagSourceToReceiptMs and timeLagConsumerCallbacksMs).
   */
@Test
public void testBasicHistogramMetrics() {
    // (1) create stats object
    UnifiedClientStats unifiedClientStats = new UnifiedClientStats(3, /* ownerId */
    "stats_name", "stats_dim");
    for (int i = 0; i < 200; ++i) {
        // Without the ability to override System.currentTimeMillis() (or hacking UnifiedClientStats to use an
        // overridable method to provide the time, and then overriding it here), there's a small chance that
        // our System.currentTimeMillis() call and that in registerDataEventReceived() will return values that
        // differ by a non-constant amount (i.e., jitter).  But we can manage that with inequalities in our
        // assertions.
        // Expected histogram values for timeLagSourceToReceiptMs range from 0 to 1990 ms (approximately).
        long sourceTimestampNs = (System.currentTimeMillis() - 10 * i) * DbusConstants.NUM_NSECS_IN_MSEC;
        // We have perfect control over the values for timeLagConsumerCallbacksMs.  Make calculations trivial:
        // histogram values will be 0 through 199 ms (exactly).
        long callbackTimeElapsedNs = (long) i * DbusConstants.NUM_NSECS_IN_MSEC;
        // (2) create 200 fake DbusEvents
        DbusEvent dbusEvent = createEvent(sourceTimestampNs);
        // (3) call registerDataEventReceived() and registerCallbacksProcessed() for each event
        // (normally there are more of the latter since there are more callback types than just onDataEvent(),
        // but it doesn't really matter, and it simplifies things if we keep a fixed ratio--here just 1:1)
        unifiedClientStats.registerDataEventReceived(dbusEvent);
        unifiedClientStats.registerCallbacksProcessed(callbackTimeElapsedNs);
    }
    // (4) verify histogram values are as expected
    // Both metrics-core and Apache Commons Math use the "R-6" quantile-estimation method, as described
    // at http://en.wikipedia.org/wiki/Quantile .
    //
    // N = 200
    // p = 0.5, 0.9, 0.95, 0.99
    // h = (N+1)*p = 100.5, 180.9, 190.95, 198.99
    //
    // Q[50th]  =  x[100-1] + (100.5  - 100)*(x[100-1+1] - x[100-1])  =   99.0 + 0.5 *(100.0 -  99.0)  =   99.5
    // Q[90th]  =  x[180-1] + (180.9  - 180)*(x[180-1+1] - x[180-1])  =  179.0 + 0.9 *(180.0 - 179.0)  =  179.9
    // Q[95th]  =  x[190-1] + (190.95 - 190)*(x[190-1+1] - x[190-1])  =  189.0 + 0.95*(190.0 - 189.0)  =  189.95
    // Q[99th]  =  x[198-1] + (198.99 - 198)*(x[198-1+1] - x[198-1])  =  197.0 + 0.99*(198.0 - 197.0)  =  197.99
    assertEquals("unexpected timeLagConsumerCallbacksMs 50th percentile", 99.5, unifiedClientStats.getTimeLagConsumerCallbacksMs_HistPct_50());
    assertEquals("unexpected timeLagConsumerCallbacksMs 90th percentile", 179.9, unifiedClientStats.getTimeLagConsumerCallbacksMs_HistPct_90());
    assertEquals("unexpected timeLagConsumerCallbacksMs 95th percentile", 189.95, unifiedClientStats.getTimeLagConsumerCallbacksMs_HistPct_95());
    assertEquals("unexpected timeLagConsumerCallbacksMs 99th percentile", 197.99, unifiedClientStats.getTimeLagConsumerCallbacksMs_HistPct_99());
    assertEquals("unexpected timeLagConsumerCallbacksMs max value", 199.0, unifiedClientStats.getTimeLagConsumerCallbacksMs_Max());
    // See sourceTimestampNs comment above.  Approximately:
    // Q[50th]  =  x[100-1] + (100.5  - 100)*(x[100-1+1] - x[100-1])  =   990.0 + 0.5 *(1000.0 -  990.0)  =   995.0
    // Q[90th]  =  x[180-1] + (180.9  - 180)*(x[180-1+1] - x[180-1])  =  1790.0 + 0.9 *(1800.0 - 1790.0)  =  1799.0
    // Q[95th]  =  x[190-1] + (190.95 - 190)*(x[190-1+1] - x[190-1])  =  1890.0 + 0.95*(1900.0 - 1890.0)  =  1899.5
    // Q[99th]  =  x[198-1] + (198.99 - 198)*(x[198-1+1] - x[198-1])  =  1970.0 + 0.99*(1980.0 - 1970.0)  =  1979.9
    // ...but allow +/-1 for jitter
    double percentile = unifiedClientStats.getTimeLagSourceToReceiptMs_HistPct_50();
    assertTrue("unexpected timeLagSourceToReceiptMs 50th percentile: " + percentile, // nominal value is 995.0
    994.0 <= percentile && percentile <= 996.0);
    percentile = unifiedClientStats.getTimeLagSourceToReceiptMs_HistPct_90();
    assertTrue("unexpected timeLagSourceToReceiptMs 90th percentile: " + percentile, // nominal value is 1799.0
    1798.0 <= percentile && percentile <= 1800.0);
    percentile = unifiedClientStats.getTimeLagSourceToReceiptMs_HistPct_95();
    assertTrue("unexpected timeLagSourceToReceiptMs 95th percentile: " + percentile, // nominal value is 1899.5, but saw 1900.45 once
    1898.5 <= percentile && percentile <= 1900.5);
    percentile = unifiedClientStats.getTimeLagSourceToReceiptMs_HistPct_99();
    assertTrue("unexpected timeLagSourceToReceiptMs 99th percentile: " + percentile, // nominal value is 1979.9
    1978.9 <= percentile && percentile <= 1980.9);
}
Also used : UnifiedClientStats(com.linkedin.databus.client.pub.mbean.UnifiedClientStats) DbusEvent(com.linkedin.databus.core.DbusEvent) Test(org.testng.annotations.Test)

Aggregations

UnifiedClientStats (com.linkedin.databus.client.pub.mbean.UnifiedClientStats)21 Test (org.testng.annotations.Test)10 ConsumerCallbackStats (com.linkedin.databus.client.pub.mbean.ConsumerCallbackStats)9 Checkpoint (com.linkedin.databus.core.Checkpoint)9 DbusEvent (com.linkedin.databus.core.DbusEvent)9 ArrayList (java.util.ArrayList)7 DbusEventBuffer (com.linkedin.databus.core.DbusEventBuffer)6 DbusEventsStatisticsCollector (com.linkedin.databus.core.monitoring.mbean.DbusEventsStatisticsCollector)6 IdNamePair (com.linkedin.databus.core.util.IdNamePair)6 HashMap (java.util.HashMap)6 DatabusV2ConsumerRegistration (com.linkedin.databus.client.consumer.DatabusV2ConsumerRegistration)5 DatabusSubscription (com.linkedin.databus.core.data_model.DatabusSubscription)5 MultiConsumerCallback (com.linkedin.databus.client.consumer.MultiConsumerCallback)3 SelectingDatabusCombinedConsumer (com.linkedin.databus.client.consumer.SelectingDatabusCombinedConsumer)3 StreamConsumerCallbackFactory (com.linkedin.databus.client.consumer.StreamConsumerCallbackFactory)3 DatabusStreamConsumer (com.linkedin.databus.client.pub.DatabusStreamConsumer)3 ConsumerCallbackResult (com.linkedin.databus.client.pub.ConsumerCallbackResult)2 ServerInfo (com.linkedin.databus.client.pub.ServerInfo)2 ConsumerCallbackStatsEvent (com.linkedin.databus.client.pub.monitoring.events.ConsumerCallbackStatsEvent)2 UnifiedClientStatsEvent (com.linkedin.databus.client.pub.monitoring.events.UnifiedClientStatsEvent)2