the class CassandraSpanConsumer method accept.
* This fans out into many requests, last count was 2 * spans.size. If any of these fail, the
* returned future will fail. Most callers drop or log the result.
public ListenableFuture<Void> accept(List<Span> rawSpans) {
ImmutableSet.Builder<ListenableFuture<?>> futures = ImmutableSet.builder();
for (Span span : rawSpans) {
// indexing occurs by timestamp, so derive one if not present.
Long timestamp = guessTimestamp(span);
TraceIdUDT traceId = new TraceIdUDT(span.traceIdHigh, span.traceId);
boolean isServerRecvSpan = isServerRecvSpan(span);
futures.add(storeSpan(span, traceId, isServerRecvSpan, timestamp));
for (String serviceName : span.serviceNames()) {
// QueryRequest.min/maxDuration
if (timestamp != null) {
// Contract for Repository.storeTraceServiceSpanName is to store the span twice, once with
// the span name and another with empty string.
futures.add(storeTraceServiceSpanName(serviceName,, timestamp, span.duration, traceId));
if (! {
// If == "", this would be redundant
futures.add(storeTraceServiceSpanName(serviceName, "", timestamp, span.duration, traceId));
return transform(Futures.allAsList(, TO_VOID);
the class CassandraSpanStore method getTraceIdsByServiceNames.
ListenableFuture<Map<TraceIdUDT, Long>> getTraceIdsByServiceNames(QueryRequest request) {
long oldestData = indexTtl == 0 ? 0 : (System.currentTimeMillis() - indexTtl * 1000);
long startTsMillis = Math.max((request.endTs - request.lookback), oldestData);
long endTsMillis = Math.max(request.endTs, oldestData);
try {
Set<String> serviceNames;
if (null != request.serviceName) {
serviceNames = Collections.singleton(request.serviceName);
} else {
serviceNames = new LinkedHashSet<>(getServiceNames().get());
if (serviceNames.isEmpty()) {
return immediateFuture(Collections.<TraceIdUDT, Long>emptyMap());
int startBucket = CassandraUtil.durationIndexBucket(startTsMillis * 1000);
int endBucket = CassandraUtil.durationIndexBucket(endTsMillis * 1000);
if (startBucket > endBucket) {
throw new IllegalArgumentException("Start bucket (" + startBucket + ") > end bucket (" + endBucket + ")");
Set<Integer> buckets = ContiguousSet.create(Range.closed(startBucket, endBucket), integers());
boolean withDuration = null != request.minDuration || null != request.maxDuration;
List<ListenableFuture<Map<TraceIdUDT, Long>>> futures = new ArrayList<>();
if (200 < serviceNames.size() * buckets.size()) {
LOG.warn("read against " + TABLE_TRACE_BY_SERVICE_SPAN + " fanning out to " + serviceNames.size() * buckets.size() + " requests");
//@xxx the fan-out of requests here can be improved
for (String serviceName : serviceNames) {
for (Integer bucket : buckets) {
BoundStatement bound = CassandraUtil.bindWithName(withDuration ? selectTraceIdsByServiceSpanNameAndDuration : selectTraceIdsByServiceSpanName, "select-trace-ids-by-service-name").setString("service_name", serviceName).setString("span_name", null != request.spanName ? request.spanName : "").setInt("bucket", bucket).setUUID("start_ts", UUIDs.startOf(startTsMillis)).setUUID("end_ts", UUIDs.endOf(endTsMillis)).setInt("limit_", request.limit);
if (withDuration) {
bound = bound.setLong("start_duration", null != request.minDuration ? request.minDuration : 0).setLong("end_duration", null != request.maxDuration ? request.maxDuration : Long.MAX_VALUE);
futures.add(transform(session.executeAsync(bound), traceIdToTimestamp));
return transform(allAsList(futures), collapseTraceIdMaps);
} catch (RuntimeException | InterruptedException | ExecutionException ex) {
return immediateFailedFuture(ex);
the class Indexer method index.
ImmutableSet<ListenableFuture<?>> index(List<Span> spans) {
// First parse each span into partition keys used to support query requests
Builder<PartitionKeyToTraceId, Long> parsed = ImmutableSetMultimap.builder();
for (Span span : spans) {
Long timestamp = guessTimestamp(span);
if (timestamp == null)
for (String partitionKey : index.partitionKeys(span)) {
parsed.put(new PartitionKeyToTraceId(index.table(), partitionKey, span.traceId), // index precision is millis
1000 * (timestamp / 1000));
// The parsed results may include inserts that already occur, or are redundant as they don't
// impact QueryRequest.endTs or QueryRequest.loopback. For example, a parsed timestamp could
// be between timestamps of rows that already exist for a particular trace.
ImmutableSetMultimap<PartitionKeyToTraceId, Long> maybeInsert =;
ImmutableSetMultimap<PartitionKeyToTraceId, Long> toInsert;
if (sharedState == null) {
// special-case when caching is disabled.
toInsert = maybeInsert;
} else {
// Optimized results will be smaller when the input includes traces with local spans, or when
// other threads indexed the same trace.
toInsert = entriesThatIncreaseGap(sharedState, maybeInsert);
if (maybeInsert.size() > toInsert.size() && LOG.isDebugEnabled()) {
int delta = maybeInsert.size() - toInsert.size();
LOG.debug("optimized out {}/{} inserts into {}", delta, maybeInsert.size(), index.table());
// For each entry, insert a new row in the index table asynchronously
ImmutableSet.Builder<ListenableFuture<?>> result = ImmutableSet.builder();
for (Map.Entry<PartitionKeyToTraceId, Long> entry : toInsert.entries()) {
BoundStatement bound = bindWithName(prepared, boundName).setLong("trace_id", entry.getKey().traceId).setBytesUnsafe("ts", timestampCodec.serialize(entry.getValue()));
if (indexTtl != null) {
bound.setInt("ttl_", indexTtl);
index.bindPartitionKey(bound, entry.getKey().partitionKey);
the class DeduplicatingExecutorTest method multithreaded.
* This shows that any number of threads perform a computation only once.
public void multithreaded() throws Exception {
Session session = mock(Session.class);
DeduplicatingExecutor executor = new DeduplicatingExecutor(session, TimeUnit.SECONDS.toMillis(1L));
BoundStatement statement = mock(BoundStatement.class);
when(session.executeAsync(statement)).thenAnswer(invocationOnMock -> mock(ResultSetFuture.class));
int loopCount = 1000;
CountDownLatch latch = new CountDownLatch(loopCount);
ExecutorService exec = Executors.newFixedThreadPool(10);
Collection<ListenableFuture<?>> futures = new ConcurrentLinkedDeque<>();
for (int i = 0; i < loopCount; i++) {
exec.execute(() -> {
futures.add(executor.maybeExecuteAsync(statement, "foo"));
futures.add(executor.maybeExecuteAsync(statement, "bar"));
ImmutableSet<ListenableFuture<?>> distinctFutures = ImmutableSet.copyOf(futures);
// expire the result
// Sanity check: we don't memoize after we should have expired.
assertThat(executor.maybeExecuteAsync(statement, "foo")).isNotIn(distinctFutures);
assertThat(executor.maybeExecuteAsync(statement, "bar")).isNotIn(distinctFutures);
the class ElasticsearchSpanStore method getTraces.
public ListenableFuture<List<List<Span>>> getTraces(final QueryRequest request) {
long endMillis = request.endTs;
long beginMillis = endMillis - request.lookback;
BoolQueryBuilder filter = boolQuery().must(rangeQuery("timestamp_millis").gte(beginMillis).lte(endMillis));
if (request.serviceName != null) {
filter.must(boolQuery().should(nestedQuery("annotations", termQuery("annotations.endpoint.serviceName", request.serviceName))).should(nestedQuery("binaryAnnotations", termQuery("binaryAnnotations.endpoint.serviceName", request.serviceName))));
if (request.spanName != null) {
filter.must(termQuery("name", request.spanName));
for (String annotation : request.annotations) {
BoolQueryBuilder annotationQuery = boolQuery().must(termQuery("annotations.value", annotation));
if (request.serviceName != null) {
annotationQuery.must(termQuery("annotations.endpoint.serviceName", request.serviceName));
filter.must(nestedQuery("annotations", annotationQuery));
for (Map.Entry<String, String> kv : request.binaryAnnotations.entrySet()) {
// In our index template, we make sure the binaryAnnotation value is indexed as string,
// meaning non-string values won't even be indexed at all. This means that we can only
// match string values here, which happens to be exactly what we want.
BoolQueryBuilder binaryAnnotationQuery = boolQuery().must(termQuery("binaryAnnotations.key", kv.getKey())).must(termQuery("binaryAnnotations.value", kv.getValue()));
if (request.serviceName != null) {
binaryAnnotationQuery.must(termQuery("binaryAnnotations.endpoint.serviceName", request.serviceName));
filter.must(nestedQuery("binaryAnnotations", binaryAnnotationQuery));
if (request.minDuration != null) {
RangeQueryBuilder durationQuery = rangeQuery("duration").gte(request.minDuration);
if (request.maxDuration != null) {
Set<String> strings = indexNameFormatter.indexNamePatternsForRange(beginMillis, endMillis);
final String[] indices = strings.toArray(new String[0]);
// We need to filter to traces that contain at least one span that matches the request,
// but the zipkin API is supposed to order traces by first span, regardless of if it was
// filtered or not. This is not possible without either multiple, heavyweight queries
// or complex multiple indexing, defeating much of the elegance of using elasticsearch for this.
// So we fudge and order on the first span among the filtered spans - in practice, there should
// be no significant difference in user experience since span start times are usually very
// close to each other in human time.
ListenableFuture<List<String>> traceIds = client.collectBucketKeys(indices, boolQuery().must(matchAllQuery()).filter(filter), AggregationBuilders.terms("traceId_agg").field("traceId").subAggregation(AggregationBuilders.min("timestamps_agg").field("timestamp_millis")).order(Order.aggregation("timestamps_agg", false)).size(request.limit));
return transform(traceIds, new AsyncFunction<List<String>, List<List<Span>>>() {
public ListenableFuture<List<List<Span>>> apply(List<String> input) {
return getTracesByIds(input, indices, request);