use of org.hypertrace.core.spannormalizer.TraceIdentity in project hypertrace-ingester by hypertrace.
the class RawSpansGrouper method buildTopology.
public StreamsBuilder buildTopology(Map<String, Object> properties, StreamsBuilder streamsBuilder, Map<String, KStream<?, ?>> inputStreams) {
Config jobConfig = getJobConfig(properties);
String inputTopic = jobConfig.getString(INPUT_TOPIC_CONFIG_KEY);
String outputTopic = jobConfig.getString(OUTPUT_TOPIC_CONFIG_KEY);
KStream<TraceIdentity, RawSpan> inputStream = (KStream<TraceIdentity, RawSpan>) inputStreams.get(inputTopic);
if (inputStream == null) {
inputStream = streamsBuilder.stream(inputTopic);
inputStreams.put(inputTopic, inputStream);
}
// Retrieve the default value serde defined in config and use it
Serde valueSerde = defaultValueSerde(properties);
Serde keySerde = defaultKeySerde(properties);
StoreBuilder<KeyValueStore<TraceIdentity, TraceState>> traceStateStoreBuilder = Stores.keyValueStoreBuilder(Stores.persistentKeyValueStore(TRACE_STATE_STORE), keySerde, valueSerde).withCachingEnabled();
StoreBuilder<KeyValueStore<SpanIdentity, RawSpan>> spanStoreBuilder = Stores.keyValueStoreBuilder(Stores.persistentKeyValueStore(SPAN_STATE_STORE_NAME), keySerde, valueSerde).withCachingEnabled();
streamsBuilder.addStateStore(spanStoreBuilder);
streamsBuilder.addStateStore(traceStateStoreBuilder);
Produced<String, StructuredTrace> outputTopicProducer = Produced.with(Serdes.String(), null);
outputTopicProducer = outputTopicProducer.withName(OUTPUT_TOPIC_PRODUCER);
inputStream.transform(RawSpansProcessor::new, Named.as(RawSpansProcessor.class.getSimpleName()), SPAN_STATE_STORE_NAME, TRACE_STATE_STORE).to(outputTopic, outputTopicProducer);
return streamsBuilder;
}
use of org.hypertrace.core.spannormalizer.TraceIdentity in project hypertrace-ingester by hypertrace.
the class RawSpansProcessor method transform.
public KeyValue<String, StructuredTrace> transform(TraceIdentity key, RawSpan value) {
Instant start = Instant.now();
long currentTimeMs = System.currentTimeMillis();
TraceState traceState = traceStateStore.get(key);
boolean firstEntry = (traceState == null);
if (shouldDropSpan(key, traceState)) {
return null;
}
String tenantId = key.getTenantId();
ByteBuffer traceId = value.getTraceId();
ByteBuffer spanId = value.getEvent().getEventId();
spanStore.put(new SpanIdentity(tenantId, traceId, spanId), value);
/*
the trace emit ts is essentially currentTs + groupingWindowTimeoutMs
i.e. if there is no span added in the next 'groupingWindowTimeoutMs' interval
then the trace can be finalized and emitted
*/
long traceEmitTs = currentTimeMs + groupingWindowTimeoutMs;
if (logger.isDebugEnabled()) {
logger.debug("Updating trigger_ts=[{}] for for tenant_id=[{}], trace_id=[{}]", Instant.ofEpochMilli(traceEmitTs), key.getTenantId(), HexUtils.getHex(traceId));
}
if (firstEntry) {
traceState = fastNewBuilder(TraceState.Builder.class).setTraceStartTimestamp(currentTimeMs).setTraceEndTimestamp(currentTimeMs).setEmitTs(traceEmitTs).setTenantId(tenantId).setTraceId(traceId).setSpanIds(List.of(spanId)).build();
schedulePunctuator(key);
} else {
traceState.getSpanIds().add(spanId);
traceState.setTraceEndTimestamp(currentTimeMs);
traceState.setEmitTs(traceEmitTs);
}
traceStateStore.put(key, traceState);
tenantToSpansGroupingTimer.computeIfAbsent(value.getCustomerId(), k -> PlatformMetricsRegistry.registerTimer(PROCESSING_LATENCY_TIMER, Map.of("tenantId", k))).record(Duration.between(start, Instant.now()).toMillis(), TimeUnit.MILLISECONDS);
// the punctuator will emit the trace
return null;
}
use of org.hypertrace.core.spannormalizer.TraceIdentity in project hypertrace-ingester by hypertrace.
the class RawSpansGrouperTest method whenRawSpansAreReceivedWithInactivityExpectTraceToBeOutput.
@Test
@SetEnvironmentVariable(key = "SERVICE_NAME", value = "raw-spans-grouper")
public void whenRawSpansAreReceivedWithInactivityExpectTraceToBeOutput(@TempDir Path tempDir) {
File file = tempDir.resolve("state").toFile();
RawSpansGrouper underTest = new RawSpansGrouper(ConfigClientFactory.getClient());
Config config = ConfigFactory.parseURL(getClass().getClassLoader().getResource("configs/raw-spans-grouper/application.conf"));
Map<String, Object> baseProps = underTest.getBaseStreamsConfig();
Map<String, Object> streamsProps = underTest.getStreamsConfig(config);
baseProps.forEach(streamsProps::put);
Map<String, Object> mergedProps = streamsProps;
mergedProps.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, SpecificAvroSerde.class);
mergedProps.put(RawSpanGrouperConstants.RAW_SPANS_GROUPER_JOB_CONFIG, config);
mergedProps.put(StreamsConfig.STATE_DIR_CONFIG, file.getAbsolutePath());
StreamsBuilder streamsBuilder = underTest.buildTopology(mergedProps, new StreamsBuilder(), new HashMap<>());
Properties props = new Properties();
mergedProps.forEach(props::put);
Serde defaultValueSerde = new StreamsConfig(mergedProps).defaultValueSerde();
Serde<TraceIdentity> traceIdentitySerde = new StreamsConfig(mergedProps).defaultKeySerde();
TopologyTestDriver td = new TopologyTestDriver(streamsBuilder.build(), props);
TestInputTopic<TraceIdentity, RawSpan> inputTopic = td.createInputTopic(config.getString(RawSpanGrouperConstants.INPUT_TOPIC_CONFIG_KEY), traceIdentitySerde.serializer(), defaultValueSerde.serializer());
TestOutputTopic outputTopic = td.createOutputTopic(config.getString(RawSpanGrouperConstants.OUTPUT_TOPIC_CONFIG_KEY), Serdes.String().deserializer(), defaultValueSerde.deserializer());
String tenantId = "tenant1";
// create spans for trace-1 of tenant1
RawSpan span1 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-1".getBytes())).setCustomerId("tenant1").setEvent(createEvent("event-1", "tenant1")).build();
RawSpan span2 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-1".getBytes())).setCustomerId("tenant1").setEvent(createEvent("event-2", "tenant1")).build();
RawSpan span3 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-1".getBytes())).setCustomerId("tenant1").setEvent(createEvent("event-3", "tenant1")).build();
// create spans for trace-2 of tenant1
RawSpan span4 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-2".getBytes())).setCustomerId("tenant1").setEvent(createEvent("event-4", "tenant1")).build();
RawSpan span5 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-2".getBytes())).setCustomerId("tenant1").setEvent(createEvent("event-5", "tenant1")).build();
// create spans for trace-3 of tenant1
RawSpan span6 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-3".getBytes())).setCustomerId("tenant1").setEvent(createEvent("event-6", "tenant1")).build();
RawSpan span7 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-3".getBytes())).setCustomerId("tenant1").setEvent(createEvent("event-7", "tenant1")).build();
RawSpan span8 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-3".getBytes())).setCustomerId("tenant1").setEvent(createEvent("event-8", "tenant1")).build();
RawSpan span9 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-3".getBytes())).setCustomerId("tenant1").setEvent(createEvent("event-9", "tenant1")).build();
RawSpan span10 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-3".getBytes())).setCustomerId("tenant1").setEvent(createEvent("event-10", "tenant1")).build();
RawSpan span11 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-3".getBytes())).setCustomerId("tenant1").setEvent(createEvent("event-11", "tenant1")).build();
// create 8 spans for tenant-2 for trace-4
String tenant2 = "tenant2";
RawSpan span12 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-4".getBytes())).setCustomerId(tenant2).setEvent(createEvent("event-12", tenant2)).build();
RawSpan span13 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-4".getBytes())).setCustomerId(tenant2).setEvent(createEvent("event-13", tenant2)).build();
RawSpan span14 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-4".getBytes())).setCustomerId(tenant2).setEvent(createEvent("event-14", tenant2)).build();
RawSpan span15 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-4".getBytes())).setCustomerId(tenant2).setEvent(createEvent("event-15", tenant2)).build();
RawSpan span16 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-4".getBytes())).setCustomerId(tenant2).setEvent(createEvent("event-16", tenant2)).build();
RawSpan span17 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-4".getBytes())).setCustomerId(tenant2).setEvent(createEvent("event-17", tenant2)).build();
RawSpan span18 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-4".getBytes())).setCustomerId(tenant2).setEvent(createEvent("event-18", tenant2)).build();
RawSpan span19 = RawSpan.newBuilder().setTraceId(ByteBuffer.wrap("trace-4".getBytes())).setCustomerId(tenant2).setEvent(createEvent("event-19", tenant2)).build();
inputTopic.pipeInput(createTraceIdentity(tenantId, "trace-1"), span1);
inputTopic.pipeInput(createTraceIdentity(tenantId, "trace-2"), span4);
td.advanceWallClockTime(Duration.ofSeconds(1));
inputTopic.pipeInput(createTraceIdentity(tenantId, "trace-1"), span2);
// select a value < 30s (groupingWindowTimeoutInMs)
// this shouldn't trigger a punctuate call
td.advanceWallClockTime(Duration.ofMillis(200));
assertTrue(outputTopic.isEmpty());
// the next advance should trigger a punctuate call and emit a trace with 2 spans
td.advanceWallClockTime(Duration.ofSeconds(32));
// trace1 should have 2 span span1, span2
StructuredTrace trace = (StructuredTrace) outputTopic.readValue();
assertEquals(2, trace.getEventList().size());
Set<String> traceEventIds = trace.getEventList().stream().map(id -> new String(id.getEventId().array())).collect(Collectors.toSet());
assertTrue(traceEventIds.contains("event-1"));
assertTrue(traceEventIds.contains("event-2"));
// trace2 should have 1 span span3
trace = (StructuredTrace) outputTopic.readValue();
assertEquals(1, trace.getEventList().size());
assertEquals("event-4", new String(trace.getEventList().get(0).getEventId().array()));
inputTopic.pipeInput(createTraceIdentity(tenantId, "trace-1"), span3);
td.advanceWallClockTime(Duration.ofSeconds(45));
inputTopic.pipeInput(createTraceIdentity(tenantId, "trace-2"), span5);
// the next advance should trigger a punctuate call and emit a trace with 2 spans
td.advanceWallClockTime(Duration.ofSeconds(35));
// trace1 should have 1 span i.e. span3
trace = (StructuredTrace) outputTopic.readValue();
assertEquals(1, trace.getEventList().size());
assertEquals("event-3", new String(trace.getEventList().get(0).getEventId().array()));
// trace2 should have 1 span i.e. span4
trace = (StructuredTrace) outputTopic.readValue();
assertEquals(1, trace.getEventList().size());
assertEquals("event-5", new String(trace.getEventList().get(0).getEventId().array()));
inputTopic.pipeInput(createTraceIdentity(tenantId, "trace-3"), span6);
inputTopic.pipeInput(createTraceIdentity(tenantId, "trace-3"), span7);
inputTopic.pipeInput(createTraceIdentity(tenantId, "trace-3"), span8);
inputTopic.pipeInput(createTraceIdentity(tenantId, "trace-3"), span9);
inputTopic.pipeInput(createTraceIdentity(tenantId, "trace-3"), span10);
inputTopic.pipeInput(createTraceIdentity(tenantId, "trace-3"), span11);
td.advanceWallClockTime(Duration.ofSeconds(35));
// trace should be truncated with 5 spans
trace = (StructuredTrace) outputTopic.readValue();
assertEquals(5, trace.getEventList().size());
// input 8 spans of trace-4 for tenant2, as there is global upper limit apply, it will emit only
// 6
inputTopic.pipeInput(createTraceIdentity(tenant2, "trace-4"), span12);
inputTopic.pipeInput(createTraceIdentity(tenant2, "trace-4"), span13);
inputTopic.pipeInput(createTraceIdentity(tenant2, "trace-4"), span14);
inputTopic.pipeInput(createTraceIdentity(tenant2, "trace-4"), span15);
inputTopic.pipeInput(createTraceIdentity(tenant2, "trace-4"), span16);
inputTopic.pipeInput(createTraceIdentity(tenant2, "trace-4"), span17);
inputTopic.pipeInput(createTraceIdentity(tenant2, "trace-4"), span18);
inputTopic.pipeInput(createTraceIdentity(tenant2, "trace-4"), span19);
td.advanceWallClockTime(Duration.ofSeconds(35));
trace = (StructuredTrace) outputTopic.readValue();
assertEquals(6, trace.getEventList().size());
}
use of org.hypertrace.core.spannormalizer.TraceIdentity in project hypertrace-ingester by hypertrace.
the class RawSpansProcessor method restorePunctuators.
/**
* Punctuators are not persisted - so on restart we recover punctuators and schedule them to run
* after {@link RawSpansProcessor#groupingWindowTimeoutMs}
*/
void restorePunctuators() {
long count = 0;
Instant start = Instant.now();
try (KeyValueIterator<TraceIdentity, TraceState> it = traceStateStore.all()) {
while (it.hasNext()) {
schedulePunctuator(it.next().key);
count++;
}
logger.info("Restored=[{}] punctuators, Duration=[{}]", count, Duration.between(start, Instant.now()));
}
}
use of org.hypertrace.core.spannormalizer.TraceIdentity in project hypertrace-ingester by hypertrace.
the class RawSpansProcessor method init.
@Override
public void init(ProcessorContext context) {
this.context = context;
this.spanStore = (KeyValueStore<SpanIdentity, RawSpan>) context.getStateStore(SPAN_STATE_STORE_NAME);
this.traceStateStore = (KeyValueStore<TraceIdentity, TraceState>) context.getStateStore(TRACE_STATE_STORE);
Config jobConfig = (Config) (context.appConfigs().get(RAW_SPANS_GROUPER_JOB_CONFIG));
this.groupingWindowTimeoutMs = jobConfig.getLong(SPAN_GROUPBY_SESSION_WINDOW_INTERVAL_CONFIG_KEY) * 1000;
if (jobConfig.hasPath(DATAFLOW_SAMPLING_PERCENT_CONFIG_KEY) && jobConfig.getDouble(DATAFLOW_SAMPLING_PERCENT_CONFIG_KEY) > 0 && jobConfig.getDouble(DATAFLOW_SAMPLING_PERCENT_CONFIG_KEY) <= 100) {
this.dataflowSamplingPercent = jobConfig.getDouble(DATAFLOW_SAMPLING_PERCENT_CONFIG_KEY);
}
if (jobConfig.hasPath(INFLIGHT_TRACE_MAX_SPAN_COUNT)) {
Config subConfig = jobConfig.getConfig(INFLIGHT_TRACE_MAX_SPAN_COUNT);
subConfig.entrySet().stream().forEach((entry) -> {
maxSpanCountMap.put(entry.getKey(), subConfig.getLong(entry.getKey()));
});
}
if (jobConfig.hasPath(DEFAULT_INFLIGHT_TRACE_MAX_SPAN_COUNT)) {
defaultMaxSpanCountLimit = jobConfig.getLong(DEFAULT_INFLIGHT_TRACE_MAX_SPAN_COUNT);
}
this.outputTopic = To.child(OUTPUT_TOPIC_PRODUCER);
restorePunctuators();
}
Aggregations