Search in sources :

Example 1 with TransactionAttempt

use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.

the class TridentSpoutExecutor method execute.

@Override
public void execute(BatchInfo info, Tuple input) {
    // there won't be a BatchInfo for the success stream
    TransactionAttempt attempt = (TransactionAttempt) input.getValue(0);
    if (input.getSourceStreamId().equals(MasterBatchCoordinator.COMMIT_STREAM_ID)) {
        if (attempt.equals(activeBatches.get(attempt.getTransactionId()))) {
            ((ICommitterTridentSpout.Emitter) emitter).commit(attempt);
            activeBatches.remove(attempt.getTransactionId());
        } else {
            throw new FailedException("Received commit for different transaction attempt");
        }
    } else if (input.getSourceStreamId().equals(MasterBatchCoordinator.SUCCESS_STREAM_ID)) {
        // valid to delete before what's been committed since
        // those batches will never be accessed again
        activeBatches.headMap(attempt.getTransactionId()).clear();
        emitter.success(attempt);
    } else {
        collector.setBatch(info.batchId);
        emitter.emitBatch(attempt, input.getValue(1), collector);
        activeBatches.put(attempt.getTransactionId(), attempt);
    }
}
Also used : FailedException(org.apache.storm.topology.FailedException) TransactionAttempt(org.apache.storm.trident.topology.TransactionAttempt)

Example 2 with TransactionAttempt

use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.

the class KafkaTridentSpoutEmitterEmitTest method testTimeStampStrategyWhenTopologyIsRedeployed.

@Test
public void testTimeStampStrategyWhenTopologyIsRedeployed() {
    /**
     * TIMESTAMP strategy should be applied if the emitter is new and the topology has been redeployed (storm id has changed)
     * Offset should be reset according to the offset corresponding to startTimeStamp
     */
    long preRestartEmittedOffset = 20;
    int preRestartEmittedRecords = 10;
    long timeStampStartOffset = 2L;
    long pollTimeout = 1L;
    KafkaTridentSpoutBatchMetadata preExecutorRestartLastMeta = new KafkaTridentSpoutBatchMetadata(preRestartEmittedOffset, preRestartEmittedOffset + preRestartEmittedRecords - 1, "Some older topology");
    KafkaConsumer<String, String> kafkaConsumer = Mockito.mock(KafkaConsumer.class);
    when(kafkaConsumer.assignment()).thenReturn(Collections.singleton(partition));
    OffsetAndTimestamp offsetAndTimestamp = new OffsetAndTimestamp(timeStampStartOffset, startTimeStamp);
    HashMap<TopicPartition, OffsetAndTimestamp> map = new HashMap<>();
    map.put(partition, offsetAndTimestamp);
    when(kafkaConsumer.offsetsForTimes(Collections.singletonMap(partition, startTimeStamp))).thenReturn(map);
    HashMap<TopicPartition, List<ConsumerRecord<String, String>>> topicPartitionMap = new HashMap<>();
    List<ConsumerRecord<String, String>> newRecords = SpoutWithMockedConsumerSetupHelper.createRecords(partition, timeStampStartOffset, recordsInKafka);
    topicPartitionMap.put(partition, newRecords);
    when(kafkaConsumer.poll(pollTimeout)).thenReturn(new ConsumerRecords<>(topicPartitionMap));
    KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(kafkaConsumer, FirstPollOffsetStrategy.TIMESTAMP);
    TransactionAttempt txid = new TransactionAttempt(0L, 0);
    KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
    Map<String, Object> meta = emitter.emitPartitionBatchNew(txid, collectorMock, kttp, preExecutorRestartLastMeta.toMap());
    verify(collectorMock, times(recordsInKafka)).emit(emitCaptor.capture());
    verify(kafkaConsumer, times(1)).seek(partition, timeStampStartOffset);
    List<List<Object>> emits = emitCaptor.getAllValues();
    assertThat(emits.get(0).get(0), is(timeStampStartOffset));
    KafkaTridentSpoutBatchMetadata deserializedMeta = KafkaTridentSpoutBatchMetadata.fromMap(meta);
    assertThat("The batch should start at the first offset for startTimestamp", deserializedMeta.getFirstOffset(), is(timeStampStartOffset));
}
Also used : HashMap(java.util.HashMap) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) TopicPartition(org.apache.kafka.common.TopicPartition) ArgumentMatchers.anyList(org.mockito.ArgumentMatchers.anyList) List(java.util.List) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) TransactionAttempt(org.apache.storm.trident.topology.TransactionAttempt) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 3 with TransactionAttempt

use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.

the class KafkaTridentSpoutEmitterEmitTest method testReEmitBatch.

@Test
public void testReEmitBatch() {
    // Check that a reemit emits exactly the same tuples as the last batch, even if Kafka returns more messages
    long firstEmittedOffset = 50;
    int numEmittedRecords = 10;
    KafkaTridentSpoutBatchMetadata batchMeta = new KafkaTridentSpoutBatchMetadata(firstEmittedOffset, firstEmittedOffset + numEmittedRecords - 1, topologyId);
    KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(FirstPollOffsetStrategy.UNCOMMITTED_EARLIEST);
    TransactionAttempt txid = new TransactionAttempt(10L, 0);
    KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
    emitter.reEmitPartitionBatch(txid, collectorMock, kttp, batchMeta.toMap());
    verify(collectorMock, times(numEmittedRecords)).emit(emitCaptor.capture());
    List<List<Object>> emits = emitCaptor.getAllValues();
    assertThat(emits.get(0).get(0), is(firstEmittedOffset));
    assertThat(emits.get(emits.size() - 1).get(0), is(firstEmittedOffset + numEmittedRecords - 1));
}
Also used : ArgumentMatchers.anyList(org.mockito.ArgumentMatchers.anyList) List(java.util.List) TransactionAttempt(org.apache.storm.trident.topology.TransactionAttempt) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 4 with TransactionAttempt

use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.

the class KafkaTridentSpoutEmitterEmitTest method doEmitNewBatchTest.

private Map<String, Object> doEmitNewBatchTest(FirstPollOffsetStrategy firstPollOffsetStrategy, TridentCollector collectorMock, TopicPartition tp, Map<String, Object> previousBatchMeta) {
    KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(firstPollOffsetStrategy);
    TransactionAttempt txid = new TransactionAttempt(10L, 0);
    KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(tp);
    return emitter.emitPartitionBatchNew(txid, collectorMock, kttp, previousBatchMeta);
}
Also used : TransactionAttempt(org.apache.storm.trident.topology.TransactionAttempt)

Example 5 with TransactionAttempt

use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.

the class KafkaTridentSpoutEmitterEmitTest method testEmitEmptyFirstBatch.

@Test
public void testEmitEmptyFirstBatch() {
    /**
     * Check that when the first batch after a redeploy is empty, the emitter does not restart at the pre-redeploy offset. STORM-3279.
     */
    long firstEmittedOffset = 50;
    int emittedRecords = 10;
    KafkaTridentSpoutBatchMetadata preRedeployLastMeta = new KafkaTridentSpoutBatchMetadata(firstEmittedOffset, firstEmittedOffset + emittedRecords - 1, "an old topology");
    KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(FirstPollOffsetStrategy.LATEST);
    TransactionAttempt txid = new TransactionAttempt(0L, 0);
    KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
    Map<String, Object> meta = emitter.emitPartitionBatchNew(txid, collectorMock, kttp, preRedeployLastMeta.toMap());
    verify(collectorMock, never()).emit(anyList());
    KafkaTridentSpoutBatchMetadata deserializedMeta = KafkaTridentSpoutBatchMetadata.fromMap(meta);
    assertThat(deserializedMeta.getFirstOffset(), is(lastOffsetInKafka));
    assertThat(deserializedMeta.getLastOffset(), is(lastOffsetInKafka));
    long firstNewRecordOffset = lastOffsetInKafka + 1;
    int numNewRecords = 10;
    List<ConsumerRecord<String, String>> newRecords = SpoutWithMockedConsumerSetupHelper.createRecords(partition, firstNewRecordOffset, numNewRecords);
    newRecords.forEach(consumer::addRecord);
    meta = emitter.emitPartitionBatchNew(txid, collectorMock, kttp, meta);
    verify(collectorMock, times(numNewRecords)).emit(emitCaptor.capture());
    List<List<Object>> emits = emitCaptor.getAllValues();
    assertThat(emits.get(0).get(0), is(firstNewRecordOffset));
    assertThat(emits.get(emits.size() - 1).get(0), is(firstNewRecordOffset + numNewRecords - 1));
    deserializedMeta = KafkaTridentSpoutBatchMetadata.fromMap(meta);
    assertThat("The batch should start at the first offset of the polled records", deserializedMeta.getFirstOffset(), is(firstNewRecordOffset));
    assertThat("The batch should end at the last offset of the polled messages", deserializedMeta.getLastOffset(), is(firstNewRecordOffset + numNewRecords - 1));
}
Also used : ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) ArgumentMatchers.anyList(org.mockito.ArgumentMatchers.anyList) List(java.util.List) TransactionAttempt(org.apache.storm.trident.topology.TransactionAttempt) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

TransactionAttempt (org.apache.storm.trident.topology.TransactionAttempt)11 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)8 Test (org.junit.jupiter.api.Test)7 List (java.util.List)6 ArgumentMatchers.anyList (org.mockito.ArgumentMatchers.anyList)6 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)3 HashMap (java.util.HashMap)1 OffsetAndTimestamp (org.apache.kafka.clients.consumer.OffsetAndTimestamp)1 TopicPartition (org.apache.kafka.common.TopicPartition)1 FailedException (org.apache.storm.topology.FailedException)1 Values (org.apache.storm.tuple.Values)1 EnumSource (org.junit.jupiter.params.provider.EnumSource)1