use of com.linkedin.databus.client.pub.mbean.UnifiedClientStats in project databus by linkedin.
the class TestGenericDispatcher method runDispatcherRollback.
/**
*
* @param numEvents number of events that will be written out in the test
* @param maxWindowSize size of window expressed as #events
* @param numFailDataEvent the nth data event at which failure occurs; 0 == no failures
* @param numFailCheckpointEvent the nth checkpoint event at which failure occurs; 0 == no failures
* @param numFailEndWindow the nth end-of-window at which failure occurs; 0 == no failures
* @param thresholdPct checkpointThresholdPct - forcible checkpoint before end-of-window
* @param negativeTest is this test supposed to fail
* @param numFailures number of failures expected (across all error types); in effect controls number of rollbacks
* @param bootstrapCheckpointsPerWindow k bootstrap checkpoint events are written for every one end-of-window event
* @param timeTakenForDataEventInMs time taken for processing data events
* @param timeTakenForControlEventInMs time taken for processing control events
* @param wrapAround use a smaller producer buffer so that events will wrap around
*/
protected void runDispatcherRollback(int numEvents, int maxWindowSize, int numFailDataEvent, int numFailCheckpointEvent, int numFailEndWindow, double thresholdPct, boolean negativeTest, int numFailures, int bootstrapCheckpointsPerWindow, long timeTakenForDataEventInMs, long timeTakenForControlEventInMs, boolean wrapAround) throws Exception {
LOG.info("Running dispatcher rollback with: " + "numEvents=" + numEvents + " maxWindowSize=" + maxWindowSize + " numFailDataEvent=" + numFailDataEvent + " numFailCheckpoint=" + numFailCheckpointEvent + " numFailEndWindow=" + numFailEndWindow + " thresholdPct=" + thresholdPct + " negativeTest=" + negativeTest + " numFailures=" + numFailures + " bootstrapCheckpointsPerWindow=" + bootstrapCheckpointsPerWindow + " timeTakenForDataEventsInMs=" + timeTakenForDataEventInMs + " timeTakenForControlEventsInMs=" + timeTakenForControlEventInMs + " wrapAround=" + wrapAround);
/* Experiment setup */
int payloadSize = 20;
int numCheckpoints = numEvents / maxWindowSize;
/* Consumer creation */
// set up consumer to fail on data callback at the nth event
TimeoutTestConsumer tConsumer = new TimeoutTestConsumer(timeTakenForDataEventInMs, timeTakenForControlEventInMs, numFailCheckpointEvent, numFailDataEvent, numFailEndWindow, numFailures);
HashMap<Long, List<RegisterResponseEntry>> schemaMap = new HashMap<Long, List<RegisterResponseEntry>>();
short srcId = 1;
List<RegisterResponseEntry> l1 = new ArrayList<RegisterResponseEntry>();
l1.add(new RegisterResponseEntry(1L, srcId, SOURCE1_SCHEMA_STR));
schemaMap.put(1L, l1);
Map<Long, IdNamePair> sourcesMap = new HashMap<Long, IdNamePair>();
List<String> sources = new ArrayList<String>();
for (int i = 1; i <= 1; ++i) {
IdNamePair sourcePair = new IdNamePair((long) i, "source" + i);
sources.add(sourcePair.getName());
sourcesMap.put(sourcePair.getId(), sourcePair);
}
long consumerTimeBudgetMs = 60 * 1000;
DatabusV2ConsumerRegistration consumerReg = new DatabusV2ConsumerRegistration(tConsumer, sources, null);
List<DatabusV2ConsumerRegistration> allRegistrations = Arrays.asList(consumerReg);
final UnifiedClientStats unifiedStats = new UnifiedClientStats(0, "test", "test.unified");
// single-threaded execution of consumer
MultiConsumerCallback mConsumer = new MultiConsumerCallback(allRegistrations, Executors.newFixedThreadPool(1), consumerTimeBudgetMs, new StreamConsumerCallbackFactory(null, unifiedStats), null, unifiedStats, null, null);
/* Generate events */
Vector<DbusEvent> srcTestEvents = new Vector<DbusEvent>();
Vector<Short> srcIdList = new Vector<Short>();
srcIdList.add(srcId);
DbusEventGenerator evGen = new DbusEventGenerator(0, srcIdList);
Assert.assertTrue(evGen.generateEvents(numEvents, maxWindowSize, 512, payloadSize, srcTestEvents) > 0);
int totalSize = 0;
int maxSize = 0;
for (DbusEvent e : srcTestEvents) {
totalSize += e.size();
maxSize = (e.size() > maxSize) ? e.size() : maxSize;
}
/* Source configuration */
double thresholdChkptPct = thresholdPct;
DatabusSourcesConnection.Config conf = new DatabusSourcesConnection.Config();
conf.setCheckpointThresholdPct(thresholdChkptPct);
conf.getDispatcherRetries().setMaxRetryNum(10);
conf.setFreeBufferThreshold(maxSize);
conf.setConsumerTimeBudgetMs(consumerTimeBudgetMs);
int freeBufferThreshold = conf.getFreeBufferThreshold();
DatabusSourcesConnection.StaticConfig connConfig = conf.build();
// make buffer large enough to hold data; the control events are large that contain checkpoints
int producerBufferSize = wrapAround ? totalSize : totalSize * 2 + numCheckpoints * 10 * maxSize * 5 + freeBufferThreshold;
int individualBufferSize = producerBufferSize;
int indexSize = producerBufferSize / 10;
int stagingBufferSize = producerBufferSize;
/* Event Buffer creation */
TestGenericDispatcherEventBuffer dataEventsBuffer = new TestGenericDispatcherEventBuffer(getConfig(producerBufferSize, individualBufferSize, indexSize, stagingBufferSize, AllocationPolicy.HEAP_MEMORY, QueuePolicy.BLOCK_ON_WRITE));
List<DatabusSubscription> subs = DatabusSubscription.createSubscriptionList(sources);
/* Generic Dispatcher creation */
TestDispatcher<DatabusCombinedConsumer> dispatcher = new TestDispatcher<DatabusCombinedConsumer>("rollBackcheck", connConfig, subs, new InMemoryPersistenceProvider(), dataEventsBuffer, mConsumer, bootstrapCheckpointsPerWindow == 0);
/* Launch writer */
DbusEventAppender eventProducer = new DbusEventAppender(srcTestEvents, dataEventsBuffer, bootstrapCheckpointsPerWindow, null);
Thread tEmitter = new Thread(eventProducer);
tEmitter.start();
/* Launch dispatcher */
Thread tDispatcher = new Thread(dispatcher);
tDispatcher.start();
/* Now initialize this state machine */
dispatcher.enqueueMessage(SourcesMessage.createSetSourcesIdsMessage(sourcesMap.values()));
dispatcher.enqueueMessage(SourcesMessage.createSetSourcesSchemasMessage(schemaMap));
// be generous; use worst case for num control events
long waitTimeMs = (numEvents * timeTakenForDataEventInMs + numEvents * timeTakenForControlEventInMs) * 4;
tEmitter.join(waitTimeMs);
// wait for dispatcher to finish reading the events
tDispatcher.join(waitTimeMs);
Assert.assertFalse(tEmitter.isAlive());
System.out.println("tConsumer: " + tConsumer);
int windowBeforeDataFail = (numFailDataEvent / maxWindowSize);
int expectedDataFaults = numFailDataEvent == 0 ? 0 : numFailures;
int expectedCheckPointFaults = (numFailCheckpointEvent == 0 || (expectedDataFaults != 0 && numFailCheckpointEvent == windowBeforeDataFail)) ? 0 : numFailures;
// check if all windows were logged by dispatcher; in online case;
if (bootstrapCheckpointsPerWindow == 0) {
Assert.assertTrue(dispatcher.getNumCheckPoints() >= (numCheckpoints - expectedCheckPointFaults));
}
// Consumer prespective
// 1 or 0 faults injected in data callbacks; success (store) differs callback by 1
Assert.assertEquals("Mismatch between callbacks and stored data on consumer.", expectedDataFaults, tConsumer.getDataCallbackCount() - tConsumer.getStoredDataCount());
Assert.assertTrue(tConsumer.getStoredDataCount() >= tConsumer.getNumUniqStoredEvents());
Assert.assertEquals("Consumer failed to store expected number of checkpoints.", dispatcher.getNumCheckPoints(), tConsumer.getStoredCheckpointCount());
// it's converted to ConsumerCallbackResult.SKIP_CHECKPOINT and therefore not seen by client metrics.
if (expectedCheckPointFaults == 0 || expectedDataFaults > 0 || negativeTest) {
Assert.assertTrue("Unexpected error count in consumer metrics (" + unifiedStats.getNumConsumerErrors() + "); should be greater than or equal to numFailures (" + numFailures + ").", unifiedStats.getNumConsumerErrors() >= numFailures);
} else {
Assert.assertEquals("Unexpected error count in consumer metrics; checkpoint errors shouldn't count. ", // unless negativeTest ...
0, unifiedStats.getNumConsumerErrors());
}
// rollback behaviour; were all events re-sent?
if (!negativeTest) {
Assert.assertTrue(tConsumer.getNumUniqStoredEvents() == numEvents);
} else {
Assert.assertTrue(tConsumer.getNumUniqStoredEvents() < numEvents);
}
dispatcher.shutdown();
verifyNoLocks(null, dataEventsBuffer);
}
use of com.linkedin.databus.client.pub.mbean.UnifiedClientStats in project databus by linkedin.
the class BootstrapPullThread method doReadBootstrapEvents.
protected void doReadBootstrapEvents(ConnectionState curState) {
boolean success = true;
boolean debugEnabled = _log.isDebugEnabled();
boolean enqueueMessage = true;
try {
Checkpoint cp = curState.getCheckpoint();
DbusEventBuffer eventBuffer = curState.getDataEventsBuffer();
if (debugEnabled)
_log.debug("Sending bootstrap events to buffer");
//eventBuffer.startEvents();
DbusEventInternalReadable cpEvent = getEventFactory().createCheckpointEvent(cp);
byte[] cpEventBytes = new byte[cpEvent.size()];
if (debugEnabled) {
_log.debug("checkpoint event size: " + cpEventBytes.length);
_log.debug("checkpoint event:" + cpEvent.toString());
}
cpEvent.getRawBytes().get(cpEventBytes);
ByteArrayInputStream cpIs = new ByteArrayInputStream(cpEventBytes);
ReadableByteChannel cpRbc = Channels.newChannel(cpIs);
UnifiedClientStats unifiedClientStats = _sourcesConn.getUnifiedClientStats();
sendHeartbeat(unifiedClientStats);
int ecnt = eventBuffer.readEvents(cpRbc);
success = (ecnt > 0);
if (!success) {
_log.error("Unable to write bootstrap phase marker");
} else {
ChunkedBodyReadableByteChannel readChannel = curState.getReadChannel();
String remoteErrorName = RemoteExceptionHandler.getExceptionName(readChannel);
Throwable remoteError = _remoteExceptionHandler.getException(readChannel);
if (null != remoteError && remoteError instanceof BootstrapDatabaseTooOldException) {
_log.error("Bootstrap database is too old!");
_remoteExceptionHandler.handleException(remoteError);
curState.switchToStreamResponseError();
} else if (null != remoteErrorName) {
//remote processing error
_log.error("read events error: " + RemoteExceptionHandler.getExceptionMessage(readChannel));
curState.switchToStreamResponseError();
} else {
sendHeartbeat(unifiedClientStats);
int eventsNum = eventBuffer.readEvents(readChannel, curState.getListeners(), _sourcesConn.getBootstrapEventsStatsCollector());
if (eventsNum == 0 && _remoteExceptionHandler.getPendingEventSize(readChannel) > eventBuffer.getMaxReadBufferCapacity()) {
String err = "ReadBuffer max capacity(" + eventBuffer.getMaxReadBufferCapacity() + ") is less than event size(" + _remoteExceptionHandler.getPendingEventSize(readChannel) + "). Increase databus.client.connectionDefaults.bstEventBuffer.maxEventSize and restart.";
_log.fatal(err);
enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(new PendingEventTooLargeException(err)));
return;
} else {
resetServerRetries();
if (debugEnabled)
_log.debug("Sending events to buffer");
numEventsInCurrentState += eventsNum;
_log.info("Bootstrap events read so far: " + numEventsInCurrentState);
String status = readChannel.getMetadata("PhaseCompleted");
final BootstrapCheckpointHandler ckptHandler = curState.getBstCheckpointHandler();
if (status != null) {
// set status in checkpoint to indicate that we are done with the current source
if (cp.getConsumptionMode() == DbusClientMode.BOOTSTRAP_CATCHUP) {
ckptHandler.finalizeCatchupPhase(cp);
} else if (cp.getConsumptionMode() == DbusClientMode.BOOTSTRAP_SNAPSHOT) {
ckptHandler.finalizeSnapshotPhase(cp);
} else {
throw new RuntimeException("Invalid bootstrap phase: " + cp.getConsumptionMode());
}
_log.info("Bootstrap events read :" + numEventsInCurrentState + " during phase:" + cp.getConsumptionMode() + " [" + cp.getBootstrapSnapshotSourceIndex() + "," + cp.getBootstrapCatchupSourceIndex() + "]");
numEventsInCurrentState = 0;
} else {
// question: how is snapshotOffset maintained in ckpt
if (eventsNum > 0) {
cp.bootstrapCheckPoint();
}
}
curState.switchToStreamResponseDone();
}
}
}
} catch (InterruptedException ie) {
_log.error("interupted", ie);
success = false;
} catch (InvalidEventException e) {
_log.error("error reading events from server: " + e.getMessage(), e);
success = false;
} catch (RuntimeException e) {
_log.error("runtime error reading events from server: " + e.getMessage(), e);
success = false;
}
if (toTearConnAfterHandlingResponse()) {
tearConnectionAndEnqueuePickServer();
enqueueMessage = false;
} else if (!success) {
curState.switchToPickServer();
}
if (enqueueMessage)
enqueueMessage(curState);
}
use of com.linkedin.databus.client.pub.mbean.UnifiedClientStats in project databus by linkedin.
the class TestUnifiedClientStats method testHistogramMetricsAggregationNonOverlappingRanges.
/**
* Tests aggregation (merging) of the histogram/percentile metrics (timeLagSourceToReceiptMs and
* timeLagConsumerCallbacksMs). This is basically the "happy path" case.
*
* Blast out 1000 low data values for stats #1 and 1000 high data values for stats #2 (interleaved so
* timestamps [and therefore priorities] are comparable), then merge and verify that max is within #2's
* range and that median falls between the two ranges. (There's no guarantee that #1's minimum or #2's
* maximum will survive, but roughly half of each one's values should, so the min and max are guaranteed
* to fall within #1's and #2's range, respectively.)
*/
@Test
public void testHistogramMetricsAggregationNonOverlappingRanges() {
// create stats objects: two low-level (per-connection) ones and one aggregator
UnifiedClientStats unifiedClientStats1 = new UnifiedClientStats(1, /* ownerId */
"test1", "dim1");
UnifiedClientStats unifiedClientStats2 = new UnifiedClientStats(2, /* ownerId */
"test2", "dim2");
UnifiedClientStats unifiedClientStatsAgg = new UnifiedClientStats(99, /* ownerId */
"testAgg", "dimAgg");
for (// 1028
int i = 0; // 1028
i < MergeableExponentiallyDecayingReservoir.DEFAULT_SIZE; // 1028
++i) {
// As noted in testBasicHistogramMetrics(), our dependence on System.currentTimeMillis() may lead
// to some jitter in the data values for timeLagSourceToReceiptMs.
long now = System.currentTimeMillis();
long sourceTimestampNs1 = (now - 1000L - i) * DbusConstants.NUM_NSECS_IN_MSEC;
long sourceTimestampNs2 = (now - 5000L - i) * DbusConstants.NUM_NSECS_IN_MSEC;
long callbackTimeElapsedNs1 = (long) i * DbusConstants.NUM_NSECS_IN_MSEC;
long callbackTimeElapsedNs2 = ((long) i + 2000L) * DbusConstants.NUM_NSECS_IN_MSEC;
DbusEvent dbusEvent1 = createEvent(sourceTimestampNs1);
DbusEvent dbusEvent2 = createEvent(sourceTimestampNs2);
unifiedClientStats1.registerDataEventReceived(dbusEvent1);
unifiedClientStats2.registerDataEventReceived(dbusEvent2);
unifiedClientStats1.registerCallbacksProcessed(callbackTimeElapsedNs1);
unifiedClientStats2.registerCallbacksProcessed(callbackTimeElapsedNs2);
}
unifiedClientStatsAgg.merge(unifiedClientStats1);
unifiedClientStatsAgg.merge(unifiedClientStats2);
// Expected timeLagConsumerCallbacksMs histogram values (exact):
// unifiedClientStats1: 0 to 1027 ms
// unifiedClientStats2: 2000 to 3027 ms
assertEquals("unexpected timeLagConsumerCallbacksMs 50th percentile for connection #1", 513.5, unifiedClientStats1.getTimeLagConsumerCallbacksMs_HistPct_50());
assertEquals("unexpected timeLagConsumerCallbacksMs 50th percentile for connection #2", 2513.5, unifiedClientStats2.getTimeLagConsumerCallbacksMs_HistPct_50());
// The exact value depends on the relative fraction of '1' and '2' values that are retained in the
// aggregate. If equal, the value should be near 1513.5, but even then the exact value depends on
// whether the 1027 and 2000 values get bumped out of the aggregate. In the more common case that
// the fractions retained are unequal, the median will fall between two values near the top end of
// unifiedClientStats1 or near the bottom end of unifiedClientStats2. An allowance of 100 either
// way should be safe.
double percentile = unifiedClientStatsAgg.getTimeLagConsumerCallbacksMs_HistPct_50();
assertTrue("unexpected timeLagConsumerCallbacksMs 50th percentile for aggregated stats: " + percentile, 927.0 <= percentile && percentile <= 2100.0);
assertEquals("unexpected timeLagConsumerCallbacksMs max value for connection #1", 1027.0, unifiedClientStats1.getTimeLagConsumerCallbacksMs_Max());
assertEquals("unexpected timeLagConsumerCallbacksMs max value for connection #2", 3027.0, unifiedClientStats2.getTimeLagConsumerCallbacksMs_Max());
double max = unifiedClientStatsAgg.getTimeLagConsumerCallbacksMs_Max();
assertTrue("unexpected timeLagConsumerCallbacksMs max value for aggregated stats: " + max, // nominal value is 3027.0
2000.0 <= max && max <= 3027.0);
// Expected timeLagSourceToReceiptMs histogram values (approximate):
// unifiedClientStats1: 1000 to 2027 ms
// unifiedClientStats2: 5000 to 6027 ms
percentile = unifiedClientStats1.getTimeLagSourceToReceiptMs_HistPct_50();
assertTrue("unexpected timeLagSourceToReceiptMs 50th percentile for connection #1: " + percentile, // nominal value is 1513.5
1512.5 <= percentile && percentile <= 1514.5);
percentile = unifiedClientStats2.getTimeLagSourceToReceiptMs_HistPct_50();
assertTrue("unexpected timeLagSourceToReceiptMs 50th percentile for connection #2: " + percentile, // nominal value is 5513.5
5512.5 <= percentile && percentile <= 5514.5);
// same caveat as above: the median depends strongly on the relative proportion of unifiedClientStats1
// and unifiedClientStats2 data points retained in the aggregate, so the inequality is quite loose
percentile = unifiedClientStatsAgg.getTimeLagSourceToReceiptMs_HistPct_50();
assertTrue("unexpected timeLagSourceToReceiptMs 50th percentile for aggregated stats: " + percentile, 1927.0 <= percentile && percentile <= 5100.0);
}
use of com.linkedin.databus.client.pub.mbean.UnifiedClientStats in project databus by linkedin.
the class TestUnifiedClientStats method testHistogramMetricsAggregationBootstrapMode.
/**
* Tests aggregation (merging) of the timeLagSourceToReceiptMs histogram/percentile metric in the case
* that one of the connections is bootstrapping.
*
* Blast out 1000 data values for stats #1 and #2, but with the latter in bootstrap mode:
* timestampLastDataEventWasReceivedMs will be zero for stats #2 (and its reservoir empty), so
* merging it won't affect the aggregate value for timeLagSourceToReceiptMs; all such aggregate
* stats should be identical to those for stats #1. Also, all values for stats #2 should be -1.0,
* per our design spec. (This is similar to testHistogramMetricsAggregationDeadSourcesConnection().)
*/
@Test
public void testHistogramMetricsAggregationBootstrapMode() {
// create stats objects: two low-level (per-connection) ones and one aggregator
UnifiedClientStats unifiedClientStats1 = new UnifiedClientStats(1, /* ownerId */
"test1", "dim1");
UnifiedClientStats unifiedClientStats2 = new UnifiedClientStats(2, /* ownerId */
"test2", "dim2");
UnifiedClientStats unifiedClientStatsAgg = new UnifiedClientStats(99, /* ownerId */
"testAgg", "dimAgg");
unifiedClientStats2.setBootstrappingState(true);
for (// 1028
int i = 0; // 1028
i < MergeableExponentiallyDecayingReservoir.DEFAULT_SIZE; // 1028
++i) {
long now = System.currentTimeMillis();
long sourceTimestampNs1 = (now - 1000L - i) * DbusConstants.NUM_NSECS_IN_MSEC;
long sourceTimestampNs2 = (now - 5000L - i) * DbusConstants.NUM_NSECS_IN_MSEC;
unifiedClientStats1.registerDataEventReceived(createEvent(sourceTimestampNs1));
unifiedClientStats2.registerDataEventReceived(createEvent(sourceTimestampNs2));
}
unifiedClientStatsAgg.merge(unifiedClientStats1);
unifiedClientStatsAgg.merge(unifiedClientStats2);
assertEquals("unexpected timeLagSourceToReceiptMs 50th percentile for aggregated stats", unifiedClientStats1.getTimeLagSourceToReceiptMs_HistPct_50(), unifiedClientStatsAgg.getTimeLagSourceToReceiptMs_HistPct_50());
assertEquals("unexpected timeLagSourceToReceiptMs 90th percentile for aggregated stats", unifiedClientStats1.getTimeLagSourceToReceiptMs_HistPct_90(), unifiedClientStatsAgg.getTimeLagSourceToReceiptMs_HistPct_90());
assertEquals("unexpected timeLagSourceToReceiptMs 95th percentile for aggregated stats", unifiedClientStats1.getTimeLagSourceToReceiptMs_HistPct_95(), unifiedClientStatsAgg.getTimeLagSourceToReceiptMs_HistPct_95());
assertEquals("unexpected timeLagSourceToReceiptMs 99th percentile for aggregated stats", unifiedClientStats1.getTimeLagSourceToReceiptMs_HistPct_99(), unifiedClientStatsAgg.getTimeLagSourceToReceiptMs_HistPct_99());
// bootstrap mode => should return -1.0 for all percentiles
assertEquals("unexpected timeLagSourceToReceiptMs 50th percentile for connection #2", -1.0, unifiedClientStats2.getTimeLagSourceToReceiptMs_HistPct_50());
assertEquals("unexpected timeLagSourceToReceiptMs 90th percentile for connection #2", -1.0, unifiedClientStats2.getTimeLagSourceToReceiptMs_HistPct_90());
assertEquals("unexpected timeLagSourceToReceiptMs 95th percentile for connection #2", -1.0, unifiedClientStats2.getTimeLagSourceToReceiptMs_HistPct_95());
assertEquals("unexpected timeLagSourceToReceiptMs 99th percentile for connection #2", -1.0, unifiedClientStats2.getTimeLagSourceToReceiptMs_HistPct_99());
}
use of com.linkedin.databus.client.pub.mbean.UnifiedClientStats in project databus by linkedin.
the class TestUnifiedClientStats method testBasicHistogramMetrics.
/**
* Tests the basic (non-aggregated) functionality of the histogram/percentile metrics
* (timeLagSourceToReceiptMs and timeLagConsumerCallbacksMs).
*/
@Test
public void testBasicHistogramMetrics() {
// (1) create stats object
UnifiedClientStats unifiedClientStats = new UnifiedClientStats(3, /* ownerId */
"stats_name", "stats_dim");
for (int i = 0; i < 200; ++i) {
// Without the ability to override System.currentTimeMillis() (or hacking UnifiedClientStats to use an
// overridable method to provide the time, and then overriding it here), there's a small chance that
// our System.currentTimeMillis() call and that in registerDataEventReceived() will return values that
// differ by a non-constant amount (i.e., jitter). But we can manage that with inequalities in our
// assertions.
// Expected histogram values for timeLagSourceToReceiptMs range from 0 to 1990 ms (approximately).
long sourceTimestampNs = (System.currentTimeMillis() - 10 * i) * DbusConstants.NUM_NSECS_IN_MSEC;
// We have perfect control over the values for timeLagConsumerCallbacksMs. Make calculations trivial:
// histogram values will be 0 through 199 ms (exactly).
long callbackTimeElapsedNs = (long) i * DbusConstants.NUM_NSECS_IN_MSEC;
// (2) create 200 fake DbusEvents
DbusEvent dbusEvent = createEvent(sourceTimestampNs);
// (3) call registerDataEventReceived() and registerCallbacksProcessed() for each event
// (normally there are more of the latter since there are more callback types than just onDataEvent(),
// but it doesn't really matter, and it simplifies things if we keep a fixed ratio--here just 1:1)
unifiedClientStats.registerDataEventReceived(dbusEvent);
unifiedClientStats.registerCallbacksProcessed(callbackTimeElapsedNs);
}
// (4) verify histogram values are as expected
// Both metrics-core and Apache Commons Math use the "R-6" quantile-estimation method, as described
// at http://en.wikipedia.org/wiki/Quantile .
//
// N = 200
// p = 0.5, 0.9, 0.95, 0.99
// h = (N+1)*p = 100.5, 180.9, 190.95, 198.99
//
// Q[50th] = x[100-1] + (100.5 - 100)*(x[100-1+1] - x[100-1]) = 99.0 + 0.5 *(100.0 - 99.0) = 99.5
// Q[90th] = x[180-1] + (180.9 - 180)*(x[180-1+1] - x[180-1]) = 179.0 + 0.9 *(180.0 - 179.0) = 179.9
// Q[95th] = x[190-1] + (190.95 - 190)*(x[190-1+1] - x[190-1]) = 189.0 + 0.95*(190.0 - 189.0) = 189.95
// Q[99th] = x[198-1] + (198.99 - 198)*(x[198-1+1] - x[198-1]) = 197.0 + 0.99*(198.0 - 197.0) = 197.99
assertEquals("unexpected timeLagConsumerCallbacksMs 50th percentile", 99.5, unifiedClientStats.getTimeLagConsumerCallbacksMs_HistPct_50());
assertEquals("unexpected timeLagConsumerCallbacksMs 90th percentile", 179.9, unifiedClientStats.getTimeLagConsumerCallbacksMs_HistPct_90());
assertEquals("unexpected timeLagConsumerCallbacksMs 95th percentile", 189.95, unifiedClientStats.getTimeLagConsumerCallbacksMs_HistPct_95());
assertEquals("unexpected timeLagConsumerCallbacksMs 99th percentile", 197.99, unifiedClientStats.getTimeLagConsumerCallbacksMs_HistPct_99());
assertEquals("unexpected timeLagConsumerCallbacksMs max value", 199.0, unifiedClientStats.getTimeLagConsumerCallbacksMs_Max());
// See sourceTimestampNs comment above. Approximately:
// Q[50th] = x[100-1] + (100.5 - 100)*(x[100-1+1] - x[100-1]) = 990.0 + 0.5 *(1000.0 - 990.0) = 995.0
// Q[90th] = x[180-1] + (180.9 - 180)*(x[180-1+1] - x[180-1]) = 1790.0 + 0.9 *(1800.0 - 1790.0) = 1799.0
// Q[95th] = x[190-1] + (190.95 - 190)*(x[190-1+1] - x[190-1]) = 1890.0 + 0.95*(1900.0 - 1890.0) = 1899.5
// Q[99th] = x[198-1] + (198.99 - 198)*(x[198-1+1] - x[198-1]) = 1970.0 + 0.99*(1980.0 - 1970.0) = 1979.9
// ...but allow +/-1 for jitter
double percentile = unifiedClientStats.getTimeLagSourceToReceiptMs_HistPct_50();
assertTrue("unexpected timeLagSourceToReceiptMs 50th percentile: " + percentile, // nominal value is 995.0
994.0 <= percentile && percentile <= 996.0);
percentile = unifiedClientStats.getTimeLagSourceToReceiptMs_HistPct_90();
assertTrue("unexpected timeLagSourceToReceiptMs 90th percentile: " + percentile, // nominal value is 1799.0
1798.0 <= percentile && percentile <= 1800.0);
percentile = unifiedClientStats.getTimeLagSourceToReceiptMs_HistPct_95();
assertTrue("unexpected timeLagSourceToReceiptMs 95th percentile: " + percentile, // nominal value is 1899.5, but saw 1900.45 once
1898.5 <= percentile && percentile <= 1900.5);
percentile = unifiedClientStats.getTimeLagSourceToReceiptMs_HistPct_99();
assertTrue("unexpected timeLagSourceToReceiptMs 99th percentile: " + percentile, // nominal value is 1979.9
1978.9 <= percentile && percentile <= 1980.9);
}
Aggregations