use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.
the class KafkaTridentSpoutEmitterEmitTest method testEarliestStrategyWhenTopologyIsRedeployed.
@Test
public void testEarliestStrategyWhenTopologyIsRedeployed() {
/**
* EARLIEST should be applied if the emitter is new and the topology has been redeployed (storm id has changed)
*/
long preRestartEmittedOffset = 20;
int preRestartEmittedRecords = 10;
KafkaTridentSpoutBatchMetadata preExecutorRestartLastMeta = new KafkaTridentSpoutBatchMetadata(preRestartEmittedOffset, preRestartEmittedOffset + preRestartEmittedRecords - 1, "Some older topology");
KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(FirstPollOffsetStrategy.EARLIEST);
TransactionAttempt txid = new TransactionAttempt(0L, 0);
KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
Map<String, Object> meta = emitter.emitPartitionBatchNew(txid, collectorMock, kttp, preExecutorRestartLastMeta.toMap());
verify(collectorMock, times(recordsInKafka)).emit(emitCaptor.capture());
List<List<Object>> emits = emitCaptor.getAllValues();
assertThat(emits.get(0).get(0), is(firstOffsetInKafka));
assertThat(emits.get(emits.size() - 1).get(0), is(lastOffsetInKafka));
KafkaTridentSpoutBatchMetadata deserializedMeta = KafkaTridentSpoutBatchMetadata.fromMap(meta);
assertThat("The batch should start at the first offset of the polled records", deserializedMeta.getFirstOffset(), is(firstOffsetInKafka));
assertThat("The batch should end at the last offset of the polled messages", deserializedMeta.getLastOffset(), is(lastOffsetInKafka));
}
use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.
the class KafkaTridentSpoutEmitterEmitTest method testReEmitBatchForOldTopologyWhenIgnoringCommittedOffsets.
@Test
public void testReEmitBatchForOldTopologyWhenIgnoringCommittedOffsets() {
// In some cases users will want to drop retrying old batches, e.g. if the topology should start over from scratch.
// If the FirstPollOffsetStrategy ignores committed offsets, we should not retry batches for old topologies
// The batch retry should be skipped entirely
KafkaTridentSpoutBatchMetadata batchMeta = new KafkaTridentSpoutBatchMetadata(firstOffsetInKafka, lastOffsetInKafka, "a new storm id");
KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(FirstPollOffsetStrategy.EARLIEST);
TransactionAttempt txid = new TransactionAttempt(10L, 0);
KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
emitter.reEmitPartitionBatch(txid, collectorMock, kttp, batchMeta.toMap());
verify(collectorMock, never()).emit(anyList());
}
use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.
the class KafkaTridentSpoutEmitterEmitTest method testLatestStrategyWhenTopologyIsRedeployed.
@Test
public void testLatestStrategyWhenTopologyIsRedeployed() {
/**
* EARLIEST should be applied if the emitter is new and the topology has been redeployed (storm id has changed)
*/
long preRestartEmittedOffset = 20;
int preRestartEmittedRecords = 10;
KafkaTridentSpoutBatchMetadata preExecutorRestartLastMeta = new KafkaTridentSpoutBatchMetadata(preRestartEmittedOffset, preRestartEmittedOffset + preRestartEmittedRecords - 1, "Some older topology");
KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(FirstPollOffsetStrategy.LATEST);
TransactionAttempt txid = new TransactionAttempt(0L, 0);
KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
Map<String, Object> meta = emitter.emitPartitionBatchNew(txid, collectorMock, kttp, preExecutorRestartLastMeta.toMap());
verify(collectorMock, never()).emit(anyList());
}
use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.
the class KafkaTridentSpoutEmitterEmitTest method testEmitEmptyBatches.
@Test
public void testEmitEmptyBatches() throws Exception {
// Check that the emitter can handle emitting empty batches on a new partition.
// If the spout is configured to seek to LATEST, or the partition is empty, the initial batches may be empty
KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(FirstPollOffsetStrategy.LATEST);
KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
Map<String, Object> lastBatchMeta = null;
// Emit 10 empty batches, simulating no new records being present in Kafka
for (int i = 0; i < 10; i++) {
TransactionAttempt txid = new TransactionAttempt((long) i, 0);
lastBatchMeta = emitter.emitPartitionBatchNew(txid, collectorMock, kttp, lastBatchMeta);
KafkaTridentSpoutBatchMetadata deserializedMeta = KafkaTridentSpoutBatchMetadata.fromMap(lastBatchMeta);
assertThat("Since the first poll strategy is LATEST, the meta should indicate that the last message has already been emitted", deserializedMeta.getFirstOffset(), is(lastOffsetInKafka));
assertThat("Since the first poll strategy is LATEST, the meta should indicate that the last message has already been emitted", deserializedMeta.getLastOffset(), is(lastOffsetInKafka));
}
// Add new records to Kafka, and check that the next batch contains these records
long firstNewRecordOffset = lastOffsetInKafka + 1;
int numNewRecords = 10;
List<ConsumerRecord<String, String>> newRecords = SpoutWithMockedConsumerSetupHelper.createRecords(partition, firstNewRecordOffset, numNewRecords);
newRecords.forEach(consumer::addRecord);
lastBatchMeta = emitter.emitPartitionBatchNew(new TransactionAttempt(11L, 0), collectorMock, kttp, lastBatchMeta);
verify(collectorMock, times(numNewRecords)).emit(emitCaptor.capture());
List<List<Object>> emits = emitCaptor.getAllValues();
assertThat(emits.get(0).get(0), is(firstNewRecordOffset));
assertThat(emits.get(emits.size() - 1).get(0), is(firstNewRecordOffset + numNewRecords - 1));
KafkaTridentSpoutBatchMetadata deserializedMeta = KafkaTridentSpoutBatchMetadata.fromMap(lastBatchMeta);
assertThat("The batch should start at the first offset of the polled records", deserializedMeta.getFirstOffset(), is(firstNewRecordOffset));
assertThat("The batch should end at the last offset of the polled messages", deserializedMeta.getLastOffset(), is(firstNewRecordOffset + numNewRecords - 1));
}
use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.
the class KafkaTridentSpoutEmitterEmitTest method testUnconditionalStrategyWhenSpoutWorkerIsRestarted.
@ParameterizedTest
@EnumSource(value = FirstPollOffsetStrategy.class, names = { "EARLIEST", "LATEST", "TIMESTAMP" })
public void testUnconditionalStrategyWhenSpoutWorkerIsRestarted(FirstPollOffsetStrategy firstPollOffsetStrategy) {
/**
* EARLIEST/LATEST/TIMESTAMP should act like UNCOMMITTED_EARLIEST/LATEST/TIMESTAMP if the emitter is new but the
* topology has not restarted (storm id has not changed)
*/
long preRestartEmittedOffset = 20;
int lastBatchEmittedRecords = 10;
int preRestartEmittedRecords = 30;
KafkaTridentSpoutBatchMetadata preExecutorRestartLastMeta = new KafkaTridentSpoutBatchMetadata(preRestartEmittedOffset, preRestartEmittedOffset + lastBatchEmittedRecords - 1, topologyId);
KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(firstPollOffsetStrategy);
TransactionAttempt txid = new TransactionAttempt(0L, 0);
KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
Map<String, Object> meta = emitter.emitPartitionBatchNew(txid, collectorMock, kttp, preExecutorRestartLastMeta.toMap());
long firstEmittedOffset = preRestartEmittedOffset + lastBatchEmittedRecords;
int emittedRecords = recordsInKafka - preRestartEmittedRecords;
verify(collectorMock, times(emittedRecords)).emit(emitCaptor.capture());
List<List<Object>> emits = emitCaptor.getAllValues();
assertThat(emits.get(0).get(0), is(firstEmittedOffset));
assertThat(emits.get(emits.size() - 1).get(0), is(lastOffsetInKafka));
KafkaTridentSpoutBatchMetadata deserializedMeta = KafkaTridentSpoutBatchMetadata.fromMap(meta);
assertThat("The batch should start at the first offset of the polled records", deserializedMeta.getFirstOffset(), is(firstEmittedOffset));
assertThat("The batch should end at the last offset of the polled messages", deserializedMeta.getLastOffset(), is(lastOffsetInKafka));
}
Aggregations