use of org.apache.kafka.streams.Topology in project kafka by apache.
the class SuppressScenarioTest method shouldSuppressIntermediateEventsWithBytesLimit.
@Test
public void shouldSuppressIntermediateEventsWithBytesLimit() {
final StreamsBuilder builder = new StreamsBuilder();
final KTable<String, Long> valueCounts = builder.table("input", Consumed.with(STRING_SERDE, STRING_SERDE), Materialized.<String, String, KeyValueStore<Bytes, byte[]>>with(STRING_SERDE, STRING_SERDE).withCachingDisabled().withLoggingDisabled()).groupBy((k, v) -> new KeyValue<>(v, k), Grouped.with(STRING_SERDE, STRING_SERDE)).count();
valueCounts.suppress(untilTimeLimit(ofMillis(Long.MAX_VALUE), maxBytes(200L).emitEarlyWhenFull())).toStream().to("output-suppressed", Produced.with(STRING_SERDE, Serdes.Long()));
valueCounts.toStream().to("output-raw", Produced.with(STRING_SERDE, Serdes.Long()));
final Topology topology = builder.build();
System.out.println(topology.describe());
try (final TopologyTestDriver driver = new TopologyTestDriver(topology, config)) {
final TestInputTopic<String, String> inputTopic = driver.createInputTopic("input", STRING_SERIALIZER, STRING_SERIALIZER);
inputTopic.pipeInput("k1", "v1", 0L);
inputTopic.pipeInput("k1", "v2", 1L);
inputTopic.pipeInput("k2", "v1", 2L);
verify(drainProducerRecords(driver, "output-raw", STRING_DESERIALIZER, LONG_DESERIALIZER), asList(new KeyValueTimestamp<>("v1", 1L, 0L), new KeyValueTimestamp<>("v1", 0L, 1L), new KeyValueTimestamp<>("v2", 1L, 1L), new KeyValueTimestamp<>("v1", 1L, 2L)));
verify(drainProducerRecords(driver, "output-suppressed", STRING_DESERIALIZER, LONG_DESERIALIZER), asList(// consecutive updates to v1 get suppressed into only the latter.
new KeyValueTimestamp<>("v1", 0L, 1L), new KeyValueTimestamp<>("v2", 1L, 1L)));
inputTopic.pipeInput("x", "x", 3L);
verify(drainProducerRecords(driver, "output-raw", STRING_DESERIALIZER, LONG_DESERIALIZER), singletonList(new KeyValueTimestamp<>("x", 1L, 3L)));
verify(drainProducerRecords(driver, "output-suppressed", STRING_DESERIALIZER, LONG_DESERIALIZER), singletonList(// now we see that last update to v1, but we won't see the update to x until it gets evicted
new KeyValueTimestamp<>("v1", 1L, 2L)));
}
}
use of org.apache.kafka.streams.Topology in project kafka by apache.
the class SuppressScenarioTest method shouldSupportFinalResultsForSessionWindows.
@Test
public void shouldSupportFinalResultsForSessionWindows() {
final StreamsBuilder builder = new StreamsBuilder();
final KTable<Windowed<String>, Long> valueCounts = builder.stream("input", Consumed.with(STRING_SERDE, STRING_SERDE)).groupBy((String k, String v) -> k, Grouped.with(STRING_SERDE, STRING_SERDE)).windowedBy(SessionWindows.with(ofMillis(5L)).grace(ofMillis(0L))).count(Materialized.<String, Long, SessionStore<Bytes, byte[]>>as("counts").withCachingDisabled());
valueCounts.suppress(untilWindowCloses(unbounded())).toStream().map((final Windowed<String> k, final Long v) -> new KeyValue<>(k.toString(), v)).to("output-suppressed", Produced.with(STRING_SERDE, Serdes.Long()));
valueCounts.toStream().map((final Windowed<String> k, final Long v) -> new KeyValue<>(k.toString(), v)).to("output-raw", Produced.with(STRING_SERDE, Serdes.Long()));
final Topology topology = builder.build();
System.out.println(topology.describe());
try (final TopologyTestDriver driver = new TopologyTestDriver(topology, config)) {
final TestInputTopic<String, String> inputTopic = driver.createInputTopic("input", STRING_SERIALIZER, STRING_SERIALIZER);
// first window
inputTopic.pipeInput("k1", "v1", 0L);
inputTopic.pipeInput("k1", "v1", 5L);
// arbitrarily disordered records are admitted, because the *window* is not closed until stream-time > window-end + grace
inputTopic.pipeInput("k1", "v1", 1L);
// any record in the same partition advances stream time (note the key is different)
inputTopic.pipeInput("k2", "v1", 11L);
// late event for first window - this should get dropped from all streams, since the first window is now closed.
inputTopic.pipeInput("k1", "v1", 5L);
// just pushing stream time forward to flush the other events through.
inputTopic.pipeInput("k1", "v1", 30L);
verify(drainProducerRecords(driver, "output-raw", STRING_DESERIALIZER, LONG_DESERIALIZER), asList(new KeyValueTimestamp<>("[k1@0/0]", 1L, 0L), new KeyValueTimestamp<>("[k1@0/0]", null, 0L), new KeyValueTimestamp<>("[k1@0/5]", 2L, 5L), new KeyValueTimestamp<>("[k1@0/5]", null, 5L), new KeyValueTimestamp<>("[k1@0/5]", 3L, 5L), new KeyValueTimestamp<>("[k2@11/11]", 1L, 11L), new KeyValueTimestamp<>("[k1@30/30]", 1L, 30L)));
verify(drainProducerRecords(driver, "output-suppressed", STRING_DESERIALIZER, LONG_DESERIALIZER), asList(new KeyValueTimestamp<>("[k1@0/5]", 3L, 5L), new KeyValueTimestamp<>("[k2@11/11]", 1L, 11L)));
}
}
use of org.apache.kafka.streams.Topology in project kafka by apache.
the class KTableKTableOuterJoinTest method testNotSendingOldValue.
@Test
public void testNotSendingOldValue() {
final StreamsBuilder builder = new StreamsBuilder();
final int[] expectedKeys = new int[] { 0, 1, 2, 3 };
final KTable<Integer, String> table1;
final KTable<Integer, String> table2;
final KTable<Integer, String> joined;
final MockApiProcessorSupplier<Integer, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
table1 = builder.table(topic1, consumed);
table2 = builder.table(topic2, consumed);
joined = table1.outerJoin(table2, MockValueJoiner.TOSTRING_JOINER);
final Topology topology = builder.build().addProcessor("proc", supplier, ((KTableImpl<?, ?, ?>) joined).name);
try (final TopologyTestDriver driver = new TopologyTestDriver(topology, props)) {
final TestInputTopic<Integer, String> inputTopic1 = driver.createInputTopic(topic1, Serdes.Integer().serializer(), Serdes.String().serializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
final TestInputTopic<Integer, String> inputTopic2 = driver.createInputTopic(topic2, Serdes.Integer().serializer(), Serdes.String().serializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
final MockApiProcessor<Integer, String, Void, Void> proc = supplier.theCapturedProcessor();
assertTrue(((KTableImpl<?, ?, ?>) table1).sendingOldValueEnabled());
assertTrue(((KTableImpl<?, ?, ?>) table2).sendingOldValueEnabled());
assertFalse(((KTableImpl<?, ?, ?>) joined).sendingOldValueEnabled());
// push two items to the primary stream. the other table is empty
for (int i = 0; i < 2; i++) {
inputTopic1.pipeInput(expectedKeys[i], "X" + expectedKeys[i], 5L + i);
}
// pass tuple with null key, it will be discarded in join process
inputTopic1.pipeInput(null, "SomeVal", 42L);
// left: X0:0 (ts: 5), X1:1 (ts: 6)
// right:
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>("X0+null", null), 5), new KeyValueTimestamp<>(1, new Change<>("X1+null", null), 6));
// push two items to the other stream. this should produce two items.
for (int i = 0; i < 2; i++) {
inputTopic2.pipeInput(expectedKeys[i], "Y" + expectedKeys[i], 10L * i);
}
// pass tuple with null key, it will be discarded in join process
inputTopic2.pipeInput(null, "AnotherVal", 73L);
// left: X0:0 (ts: 5), X1:1 (ts: 6)
// right: Y0:0 (ts: 0), Y1:1 (ts: 10)
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>("X0+Y0", null), 5), new KeyValueTimestamp<>(1, new Change<>("X1+Y1", null), 10));
// push all four items to the primary stream. this should produce four items.
for (final int expectedKey : expectedKeys) {
inputTopic1.pipeInput(expectedKey, "XX" + expectedKey, 7L);
}
// left: XX0:0 (ts: 7), XX1:1 (ts: 7), XX2:2 (ts: 7), XX3:3 (ts: 7)
// right: Y0:0 (ts: 0), Y1:1 (ts: 10)
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>("XX0+Y0", null), 7), new KeyValueTimestamp<>(1, new Change<>("XX1+Y1", null), 10), new KeyValueTimestamp<>(2, new Change<>("XX2+null", null), 7), new KeyValueTimestamp<>(3, new Change<>("XX3+null", null), 7));
// push all items to the other stream. this should produce four items.
for (final int expectedKey : expectedKeys) {
inputTopic2.pipeInput(expectedKey, "YY" + expectedKey, expectedKey * 5L);
}
// left: XX0:0 (ts: 7), XX1:1 (ts: 7), XX2:2 (ts: 7), XX3:3 (ts: 7)
// right: YY0:0 (ts: 0), YY1:1 (ts: 5), YY2:2 (ts: 10), YY3:3 (ts: 15)
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>("XX0+YY0", null), 7), new KeyValueTimestamp<>(1, new Change<>("XX1+YY1", null), 7), new KeyValueTimestamp<>(2, new Change<>("XX2+YY2", null), 10), new KeyValueTimestamp<>(3, new Change<>("XX3+YY3", null), 15));
// push all four items to the primary stream. this should produce four items.
for (final int expectedKey : expectedKeys) {
inputTopic1.pipeInput(expectedKey, "XXX" + expectedKey, 6L);
}
// left: XXX0:0 (ts: 6), XXX1:1 (ts: 6), XXX2:2 (ts: 6), XXX3:3 (ts: 6)
// right: YY0:0 (ts: 0), YY1:1 (ts: 5), YY2:2 (ts: 10), YY3:3 (ts: 15)
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>("XXX0+YY0", null), 6), new KeyValueTimestamp<>(1, new Change<>("XXX1+YY1", null), 6), new KeyValueTimestamp<>(2, new Change<>("XXX2+YY2", null), 10), new KeyValueTimestamp<>(3, new Change<>("XXX3+YY3", null), 15));
// push two items with null to the other stream as deletes. this should produce two item.
inputTopic2.pipeInput(expectedKeys[0], null, 5L);
inputTopic2.pipeInput(expectedKeys[1], null, 7L);
// left: XXX0:0 (ts: 6), XXX1:1 (ts: 6), XXX2:2 (ts: 6), XXX3:3 (ts: 6)
// right: YY2:2 (ts: 10), YY3:3 (ts: 15)
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>("XXX0+null", null), 6), new KeyValueTimestamp<>(1, new Change<>("XXX1+null", null), 7));
// push all four items to the primary stream. this should produce four items.
for (final int expectedKey : expectedKeys) {
inputTopic1.pipeInput(expectedKey, "XXXX" + expectedKey, 13L);
}
// left: XXXX0:0 (ts: 13), XXXX1:1 (ts: 13), XXXX2:2 (ts: 13), XXXX3:3 (ts: 13)
// right: YY2:2 (ts: 10), YY3:3 (ts: 15)
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>("XXXX0+null", null), 13), new KeyValueTimestamp<>(1, new Change<>("XXXX1+null", null), 13), new KeyValueTimestamp<>(2, new Change<>("XXXX2+YY2", null), 13), new KeyValueTimestamp<>(3, new Change<>("XXXX3+YY3", null), 15));
// push four items to the primary stream with null. this should produce four items.
inputTopic1.pipeInput(expectedKeys[0], null, 0L);
inputTopic1.pipeInput(expectedKeys[1], null, 42L);
inputTopic1.pipeInput(expectedKeys[2], null, 5L);
inputTopic1.pipeInput(expectedKeys[3], null, 20L);
// left:
// right: YY2:2 (ts: 10), YY3:3 (ts: 15)
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>(null, null), 0), new KeyValueTimestamp<>(1, new Change<>(null, null), 42), new KeyValueTimestamp<>(2, new Change<>("null+YY2", null), 10), new KeyValueTimestamp<>(3, new Change<>("null+YY3", null), 20));
}
}
use of org.apache.kafka.streams.Topology in project kafka by apache.
the class KTableKTableOuterJoinTest method testSendingOldValue.
@Test
public void testSendingOldValue() {
final StreamsBuilder builder = new StreamsBuilder();
final int[] expectedKeys = new int[] { 0, 1, 2, 3 };
final KTable<Integer, String> table1;
final KTable<Integer, String> table2;
final KTable<Integer, String> joined;
final MockApiProcessorSupplier<Integer, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
table1 = builder.table(topic1, consumed);
table2 = builder.table(topic2, consumed);
joined = table1.outerJoin(table2, MockValueJoiner.TOSTRING_JOINER);
((KTableImpl<?, ?, ?>) joined).enableSendingOldValues(true);
final Topology topology = builder.build().addProcessor("proc", supplier, ((KTableImpl<?, ?, ?>) joined).name);
try (final TopologyTestDriver driver = new TopologyTestDriver(topology, props)) {
final TestInputTopic<Integer, String> inputTopic1 = driver.createInputTopic(topic1, Serdes.Integer().serializer(), Serdes.String().serializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
final TestInputTopic<Integer, String> inputTopic2 = driver.createInputTopic(topic2, Serdes.Integer().serializer(), Serdes.String().serializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
final MockApiProcessor<Integer, String, Void, Void> proc = supplier.theCapturedProcessor();
assertTrue(((KTableImpl<?, ?, ?>) table1).sendingOldValueEnabled());
assertTrue(((KTableImpl<?, ?, ?>) table2).sendingOldValueEnabled());
assertTrue(((KTableImpl<?, ?, ?>) joined).sendingOldValueEnabled());
// push two items to the primary stream. the other table is empty
for (int i = 0; i < 2; i++) {
inputTopic1.pipeInput(expectedKeys[i], "X" + expectedKeys[i], 5L + i);
}
// pass tuple with null key, it will be discarded in join process
inputTopic1.pipeInput(null, "SomeVal", 42L);
// left: X0:0 (ts: 5), X1:1 (ts: 6)
// right:
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>("X0+null", null), 5), new KeyValueTimestamp<>(1, new Change<>("X1+null", null), 6));
// push two items to the other stream. this should produce two items.
for (int i = 0; i < 2; i++) {
inputTopic2.pipeInput(expectedKeys[i], "Y" + expectedKeys[i], 10L * i);
}
// pass tuple with null key, it will be discarded in join process
inputTopic2.pipeInput(null, "AnotherVal", 73L);
// left: X0:0 (ts: 5), X1:1 (ts: 6)
// right: Y0:0 (ts: 0), Y1:1 (ts: 10)
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>("X0+Y0", "X0+null"), 5), new KeyValueTimestamp<>(1, new Change<>("X1+Y1", "X1+null"), 10));
// push all four items to the primary stream. this should produce four items.
for (final int expectedKey : expectedKeys) {
inputTopic1.pipeInput(expectedKey, "XX" + expectedKey, 7L);
}
// left: XX0:0 (ts: 7), XX1:1 (ts: 7), XX2:2 (ts: 7), XX3:3 (ts: 7)
// right: Y0:0 (ts: 0), Y1:1 (ts: 10)
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>("XX0+Y0", "X0+Y0"), 7), new KeyValueTimestamp<>(1, new Change<>("XX1+Y1", "X1+Y1"), 10), new KeyValueTimestamp<>(2, new Change<>("XX2+null", null), 7), new KeyValueTimestamp<>(3, new Change<>("XX3+null", null), 7));
// push all items to the other stream. this should produce four items.
for (final int expectedKey : expectedKeys) {
inputTopic2.pipeInput(expectedKey, "YY" + expectedKey, expectedKey * 5L);
}
// left: XX0:0 (ts: 7), XX1:1 (ts: 7), XX2:2 (ts: 7), XX3:3 (ts: 7)
// right: YY0:0 (ts: 0), YY1:1 (ts: 5), YY2:2 (ts: 10), YY3:3 (ts: 15)
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>("XX0+YY0", "XX0+Y0"), 7), new KeyValueTimestamp<>(1, new Change<>("XX1+YY1", "XX1+Y1"), 7), new KeyValueTimestamp<>(2, new Change<>("XX2+YY2", "XX2+null"), 10), new KeyValueTimestamp<>(3, new Change<>("XX3+YY3", "XX3+null"), 15));
// push all four items to the primary stream. this should produce four items.
for (final int expectedKey : expectedKeys) {
inputTopic1.pipeInput(expectedKey, "XXX" + expectedKey, 6L);
}
// left: XXX0:0 (ts: 6), XXX1:1 (ts: 6), XXX2:2 (ts: 6), XXX3:3 (ts: 6)
// right: YY0:0 (ts: 0), YY1:1 (ts: 5), YY2:2 (ts: 10), YY3:3 (ts: 15)
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>("XXX0+YY0", "XX0+YY0"), 6), new KeyValueTimestamp<>(1, new Change<>("XXX1+YY1", "XX1+YY1"), 6), new KeyValueTimestamp<>(2, new Change<>("XXX2+YY2", "XX2+YY2"), 10), new KeyValueTimestamp<>(3, new Change<>("XXX3+YY3", "XX3+YY3"), 15));
// push two items with null to the other stream as deletes. this should produce two item.
inputTopic2.pipeInput(expectedKeys[0], null, 5L);
inputTopic2.pipeInput(expectedKeys[1], null, 7L);
// left: XXX0:0 (ts: 6), XXX1:1 (ts: 6), XXX2:2 (ts: 6), XXX3:3 (ts: 6)
// right: YY2:2 (ts: 10), YY3:3 (ts: 15)
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>("XXX0+null", "XXX0+YY0"), 6), new KeyValueTimestamp<>(1, new Change<>("XXX1+null", "XXX1+YY1"), 7));
// push all four items to the primary stream. this should produce four items.
for (final int expectedKey : expectedKeys) {
inputTopic1.pipeInput(expectedKey, "XXXX" + expectedKey, 13L);
}
// left: XXXX0:0 (ts: 13), XXXX1:1 (ts: 13), XXXX2:2 (ts: 13), XXXX3:3 (ts: 13)
// right: YY2:2 (ts: 10), YY3:3 (ts: 15)
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>("XXXX0+null", "XXX0+null"), 13), new KeyValueTimestamp<>(1, new Change<>("XXXX1+null", "XXX1+null"), 13), new KeyValueTimestamp<>(2, new Change<>("XXXX2+YY2", "XXX2+YY2"), 13), new KeyValueTimestamp<>(3, new Change<>("XXXX3+YY3", "XXX3+YY3"), 15));
// push four items to the primary stream with null. this should produce four items.
inputTopic1.pipeInput(expectedKeys[0], null, 0L);
inputTopic1.pipeInput(expectedKeys[1], null, 42L);
inputTopic1.pipeInput(expectedKeys[2], null, 5L);
inputTopic1.pipeInput(expectedKeys[3], null, 20L);
// left:
// right: YY2:2 (ts: 10), YY3:3 (ts: 15)
proc.checkAndClearProcessResult(new KeyValueTimestamp<>(0, new Change<>(null, "XXXX0+null"), 0), new KeyValueTimestamp<>(1, new Change<>(null, "XXXX1+null"), 42), new KeyValueTimestamp<>(2, new Change<>("null+YY2", "XXXX2+YY2"), 10), new KeyValueTimestamp<>(3, new Change<>("null+YY3", "XXXX3+YY3"), 20));
}
}
use of org.apache.kafka.streams.Topology in project ksql by confluentinc.
the class PhysicalPlanBuilder method buildPlanForStructuredOutputNode.
private QueryMetadata buildPlanForStructuredOutputNode(String sqlExpression, final SchemaKStream schemaKStream, final KsqlStructuredDataOutputNode outputNode, final String serviceId, final String persistanceQueryPrefix, final String statement) {
if (metaStore.getTopic(outputNode.getKafkaTopicName()) == null) {
metaStore.putTopic(outputNode.getKsqlTopic());
}
StructuredDataSource sinkDataSource;
if (schemaKStream instanceof SchemaKTable) {
SchemaKTable schemaKTable = (SchemaKTable) schemaKStream;
sinkDataSource = new KsqlTable(sqlExpression, outputNode.getId().toString(), outputNode.getSchema(), schemaKStream.getKeyField(), outputNode.getTimestampField(), outputNode.getKsqlTopic(), outputNode.getId().toString() + ksqlConfig.get(KsqlConfig.KSQL_TABLE_STATESTORE_NAME_SUFFIX_CONFIG), schemaKTable.isWindowed());
} else {
sinkDataSource = new KsqlStream(sqlExpression, outputNode.getId().toString(), outputNode.getSchema(), schemaKStream.getKeyField(), outputNode.getTimestampField(), outputNode.getKsqlTopic());
}
if (updateMetastore) {
metaStore.putSource(sinkDataSource.cloneWithTimeKeyColumns());
}
final QueryId queryId = sinkDataSource.getPersistentQueryId();
final String applicationId = serviceId + persistanceQueryPrefix + queryId;
KafkaStreams streams = buildStreams(builder, applicationId, ksqlConfig, overriddenStreamsProperties);
Topology topology = builder.build();
return new PersistentQueryMetadata(statement, streams, outputNode, schemaKStream.getExecutionPlan(""), queryId, (schemaKStream instanceof SchemaKTable) ? DataSource.DataSourceType.KTABLE : DataSource.DataSourceType.KSTREAM, applicationId, kafkaTopicClient, outputNode.getSchema(), sinkDataSource.getKsqlTopic(), topology);
}
Aggregations