Search in sources :

Example 6 with TransactionAttempt

use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.

the class KafkaTridentSpoutEmitterEmitTest method testEarliestStrategyWhenTopologyIsRedeployed.

@Test
public void testEarliestStrategyWhenTopologyIsRedeployed() {
    /**
     * EARLIEST should be applied if the emitter is new and the topology has been redeployed (storm id has changed)
     */
    long preRestartEmittedOffset = 20;
    int preRestartEmittedRecords = 10;
    KafkaTridentSpoutBatchMetadata preExecutorRestartLastMeta = new KafkaTridentSpoutBatchMetadata(preRestartEmittedOffset, preRestartEmittedOffset + preRestartEmittedRecords - 1, "Some older topology");
    KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(FirstPollOffsetStrategy.EARLIEST);
    TransactionAttempt txid = new TransactionAttempt(0L, 0);
    KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
    Map<String, Object> meta = emitter.emitPartitionBatchNew(txid, collectorMock, kttp, preExecutorRestartLastMeta.toMap());
    verify(collectorMock, times(recordsInKafka)).emit(emitCaptor.capture());
    List<List<Object>> emits = emitCaptor.getAllValues();
    assertThat(emits.get(0).get(0), is(firstOffsetInKafka));
    assertThat(emits.get(emits.size() - 1).get(0), is(lastOffsetInKafka));
    KafkaTridentSpoutBatchMetadata deserializedMeta = KafkaTridentSpoutBatchMetadata.fromMap(meta);
    assertThat("The batch should start at the first offset of the polled records", deserializedMeta.getFirstOffset(), is(firstOffsetInKafka));
    assertThat("The batch should end at the last offset of the polled messages", deserializedMeta.getLastOffset(), is(lastOffsetInKafka));
}
Also used : ArgumentMatchers.anyList(org.mockito.ArgumentMatchers.anyList) List(java.util.List) TransactionAttempt(org.apache.storm.trident.topology.TransactionAttempt) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 7 with TransactionAttempt

use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.

the class KafkaTridentSpoutEmitterEmitTest method testReEmitBatchForOldTopologyWhenIgnoringCommittedOffsets.

@Test
public void testReEmitBatchForOldTopologyWhenIgnoringCommittedOffsets() {
    // In some cases users will want to drop retrying old batches, e.g. if the topology should start over from scratch.
    // If the FirstPollOffsetStrategy ignores committed offsets, we should not retry batches for old topologies
    // The batch retry should be skipped entirely
    KafkaTridentSpoutBatchMetadata batchMeta = new KafkaTridentSpoutBatchMetadata(firstOffsetInKafka, lastOffsetInKafka, "a new storm id");
    KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(FirstPollOffsetStrategy.EARLIEST);
    TransactionAttempt txid = new TransactionAttempt(10L, 0);
    KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
    emitter.reEmitPartitionBatch(txid, collectorMock, kttp, batchMeta.toMap());
    verify(collectorMock, never()).emit(anyList());
}
Also used : TransactionAttempt(org.apache.storm.trident.topology.TransactionAttempt) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 8 with TransactionAttempt

use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.

the class KafkaTridentSpoutEmitterEmitTest method testLatestStrategyWhenTopologyIsRedeployed.

@Test
public void testLatestStrategyWhenTopologyIsRedeployed() {
    /**
     * EARLIEST should be applied if the emitter is new and the topology has been redeployed (storm id has changed)
     */
    long preRestartEmittedOffset = 20;
    int preRestartEmittedRecords = 10;
    KafkaTridentSpoutBatchMetadata preExecutorRestartLastMeta = new KafkaTridentSpoutBatchMetadata(preRestartEmittedOffset, preRestartEmittedOffset + preRestartEmittedRecords - 1, "Some older topology");
    KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(FirstPollOffsetStrategy.LATEST);
    TransactionAttempt txid = new TransactionAttempt(0L, 0);
    KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
    Map<String, Object> meta = emitter.emitPartitionBatchNew(txid, collectorMock, kttp, preExecutorRestartLastMeta.toMap());
    verify(collectorMock, never()).emit(anyList());
}
Also used : TransactionAttempt(org.apache.storm.trident.topology.TransactionAttempt) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 9 with TransactionAttempt

use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.

the class KafkaTridentSpoutEmitterEmitTest method testEmitEmptyBatches.

@Test
public void testEmitEmptyBatches() throws Exception {
    // Check that the emitter can handle emitting empty batches on a new partition.
    // If the spout is configured to seek to LATEST, or the partition is empty, the initial batches may be empty
    KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(FirstPollOffsetStrategy.LATEST);
    KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
    Map<String, Object> lastBatchMeta = null;
    // Emit 10 empty batches, simulating no new records being present in Kafka
    for (int i = 0; i < 10; i++) {
        TransactionAttempt txid = new TransactionAttempt((long) i, 0);
        lastBatchMeta = emitter.emitPartitionBatchNew(txid, collectorMock, kttp, lastBatchMeta);
        KafkaTridentSpoutBatchMetadata deserializedMeta = KafkaTridentSpoutBatchMetadata.fromMap(lastBatchMeta);
        assertThat("Since the first poll strategy is LATEST, the meta should indicate that the last message has already been emitted", deserializedMeta.getFirstOffset(), is(lastOffsetInKafka));
        assertThat("Since the first poll strategy is LATEST, the meta should indicate that the last message has already been emitted", deserializedMeta.getLastOffset(), is(lastOffsetInKafka));
    }
    // Add new records to Kafka, and check that the next batch contains these records
    long firstNewRecordOffset = lastOffsetInKafka + 1;
    int numNewRecords = 10;
    List<ConsumerRecord<String, String>> newRecords = SpoutWithMockedConsumerSetupHelper.createRecords(partition, firstNewRecordOffset, numNewRecords);
    newRecords.forEach(consumer::addRecord);
    lastBatchMeta = emitter.emitPartitionBatchNew(new TransactionAttempt(11L, 0), collectorMock, kttp, lastBatchMeta);
    verify(collectorMock, times(numNewRecords)).emit(emitCaptor.capture());
    List<List<Object>> emits = emitCaptor.getAllValues();
    assertThat(emits.get(0).get(0), is(firstNewRecordOffset));
    assertThat(emits.get(emits.size() - 1).get(0), is(firstNewRecordOffset + numNewRecords - 1));
    KafkaTridentSpoutBatchMetadata deserializedMeta = KafkaTridentSpoutBatchMetadata.fromMap(lastBatchMeta);
    assertThat("The batch should start at the first offset of the polled records", deserializedMeta.getFirstOffset(), is(firstNewRecordOffset));
    assertThat("The batch should end at the last offset of the polled messages", deserializedMeta.getLastOffset(), is(firstNewRecordOffset + numNewRecords - 1));
}
Also used : ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) ArgumentMatchers.anyList(org.mockito.ArgumentMatchers.anyList) List(java.util.List) TransactionAttempt(org.apache.storm.trident.topology.TransactionAttempt) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 10 with TransactionAttempt

use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.

the class KafkaTridentSpoutEmitterEmitTest method testUnconditionalStrategyWhenSpoutWorkerIsRestarted.

@ParameterizedTest
@EnumSource(value = FirstPollOffsetStrategy.class, names = { "EARLIEST", "LATEST", "TIMESTAMP" })
public void testUnconditionalStrategyWhenSpoutWorkerIsRestarted(FirstPollOffsetStrategy firstPollOffsetStrategy) {
    /**
     * EARLIEST/LATEST/TIMESTAMP should act like UNCOMMITTED_EARLIEST/LATEST/TIMESTAMP if the emitter is new but the
     * topology has not restarted (storm id has not changed)
     */
    long preRestartEmittedOffset = 20;
    int lastBatchEmittedRecords = 10;
    int preRestartEmittedRecords = 30;
    KafkaTridentSpoutBatchMetadata preExecutorRestartLastMeta = new KafkaTridentSpoutBatchMetadata(preRestartEmittedOffset, preRestartEmittedOffset + lastBatchEmittedRecords - 1, topologyId);
    KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(firstPollOffsetStrategy);
    TransactionAttempt txid = new TransactionAttempt(0L, 0);
    KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
    Map<String, Object> meta = emitter.emitPartitionBatchNew(txid, collectorMock, kttp, preExecutorRestartLastMeta.toMap());
    long firstEmittedOffset = preRestartEmittedOffset + lastBatchEmittedRecords;
    int emittedRecords = recordsInKafka - preRestartEmittedRecords;
    verify(collectorMock, times(emittedRecords)).emit(emitCaptor.capture());
    List<List<Object>> emits = emitCaptor.getAllValues();
    assertThat(emits.get(0).get(0), is(firstEmittedOffset));
    assertThat(emits.get(emits.size() - 1).get(0), is(lastOffsetInKafka));
    KafkaTridentSpoutBatchMetadata deserializedMeta = KafkaTridentSpoutBatchMetadata.fromMap(meta);
    assertThat("The batch should start at the first offset of the polled records", deserializedMeta.getFirstOffset(), is(firstEmittedOffset));
    assertThat("The batch should end at the last offset of the polled messages", deserializedMeta.getLastOffset(), is(lastOffsetInKafka));
}
Also used : ArgumentMatchers.anyList(org.mockito.ArgumentMatchers.anyList) List(java.util.List) TransactionAttempt(org.apache.storm.trident.topology.TransactionAttempt) EnumSource(org.junit.jupiter.params.provider.EnumSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

TransactionAttempt (org.apache.storm.trident.topology.TransactionAttempt)11 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)8 Test (org.junit.jupiter.api.Test)7 List (java.util.List)6 ArgumentMatchers.anyList (org.mockito.ArgumentMatchers.anyList)6 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)3 HashMap (java.util.HashMap)1 OffsetAndTimestamp (org.apache.kafka.clients.consumer.OffsetAndTimestamp)1 TopicPartition (org.apache.kafka.common.TopicPartition)1 FailedException (org.apache.storm.topology.FailedException)1 Values (org.apache.storm.tuple.Values)1 EnumSource (org.junit.jupiter.params.provider.EnumSource)1