use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.
the class TridentSpoutExecutor method execute.
@Override
public void execute(BatchInfo info, Tuple input) {
// there won't be a BatchInfo for the success stream
TransactionAttempt attempt = (TransactionAttempt) input.getValue(0);
if (input.getSourceStreamId().equals(MasterBatchCoordinator.COMMIT_STREAM_ID)) {
if (attempt.equals(activeBatches.get(attempt.getTransactionId()))) {
((ICommitterTridentSpout.Emitter) emitter).commit(attempt);
activeBatches.remove(attempt.getTransactionId());
} else {
throw new FailedException("Received commit for different transaction attempt");
}
} else if (input.getSourceStreamId().equals(MasterBatchCoordinator.SUCCESS_STREAM_ID)) {
// valid to delete before what's been committed since
// those batches will never be accessed again
activeBatches.headMap(attempt.getTransactionId()).clear();
emitter.success(attempt);
} else {
collector.setBatch(info.batchId);
emitter.emitBatch(attempt, input.getValue(1), collector);
activeBatches.put(attempt.getTransactionId(), attempt);
}
}
use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.
the class KafkaTridentSpoutEmitterEmitTest method testTimeStampStrategyWhenTopologyIsRedeployed.
@Test
public void testTimeStampStrategyWhenTopologyIsRedeployed() {
/**
* TIMESTAMP strategy should be applied if the emitter is new and the topology has been redeployed (storm id has changed)
* Offset should be reset according to the offset corresponding to startTimeStamp
*/
long preRestartEmittedOffset = 20;
int preRestartEmittedRecords = 10;
long timeStampStartOffset = 2L;
long pollTimeout = 1L;
KafkaTridentSpoutBatchMetadata preExecutorRestartLastMeta = new KafkaTridentSpoutBatchMetadata(preRestartEmittedOffset, preRestartEmittedOffset + preRestartEmittedRecords - 1, "Some older topology");
KafkaConsumer<String, String> kafkaConsumer = Mockito.mock(KafkaConsumer.class);
when(kafkaConsumer.assignment()).thenReturn(Collections.singleton(partition));
OffsetAndTimestamp offsetAndTimestamp = new OffsetAndTimestamp(timeStampStartOffset, startTimeStamp);
HashMap<TopicPartition, OffsetAndTimestamp> map = new HashMap<>();
map.put(partition, offsetAndTimestamp);
when(kafkaConsumer.offsetsForTimes(Collections.singletonMap(partition, startTimeStamp))).thenReturn(map);
HashMap<TopicPartition, List<ConsumerRecord<String, String>>> topicPartitionMap = new HashMap<>();
List<ConsumerRecord<String, String>> newRecords = SpoutWithMockedConsumerSetupHelper.createRecords(partition, timeStampStartOffset, recordsInKafka);
topicPartitionMap.put(partition, newRecords);
when(kafkaConsumer.poll(pollTimeout)).thenReturn(new ConsumerRecords<>(topicPartitionMap));
KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(kafkaConsumer, FirstPollOffsetStrategy.TIMESTAMP);
TransactionAttempt txid = new TransactionAttempt(0L, 0);
KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
Map<String, Object> meta = emitter.emitPartitionBatchNew(txid, collectorMock, kttp, preExecutorRestartLastMeta.toMap());
verify(collectorMock, times(recordsInKafka)).emit(emitCaptor.capture());
verify(kafkaConsumer, times(1)).seek(partition, timeStampStartOffset);
List<List<Object>> emits = emitCaptor.getAllValues();
assertThat(emits.get(0).get(0), is(timeStampStartOffset));
KafkaTridentSpoutBatchMetadata deserializedMeta = KafkaTridentSpoutBatchMetadata.fromMap(meta);
assertThat("The batch should start at the first offset for startTimestamp", deserializedMeta.getFirstOffset(), is(timeStampStartOffset));
}
use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.
the class KafkaTridentSpoutEmitterEmitTest method testReEmitBatch.
@Test
public void testReEmitBatch() {
// Check that a reemit emits exactly the same tuples as the last batch, even if Kafka returns more messages
long firstEmittedOffset = 50;
int numEmittedRecords = 10;
KafkaTridentSpoutBatchMetadata batchMeta = new KafkaTridentSpoutBatchMetadata(firstEmittedOffset, firstEmittedOffset + numEmittedRecords - 1, topologyId);
KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(FirstPollOffsetStrategy.UNCOMMITTED_EARLIEST);
TransactionAttempt txid = new TransactionAttempt(10L, 0);
KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
emitter.reEmitPartitionBatch(txid, collectorMock, kttp, batchMeta.toMap());
verify(collectorMock, times(numEmittedRecords)).emit(emitCaptor.capture());
List<List<Object>> emits = emitCaptor.getAllValues();
assertThat(emits.get(0).get(0), is(firstEmittedOffset));
assertThat(emits.get(emits.size() - 1).get(0), is(firstEmittedOffset + numEmittedRecords - 1));
}
use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.
the class KafkaTridentSpoutEmitterEmitTest method doEmitNewBatchTest.
private Map<String, Object> doEmitNewBatchTest(FirstPollOffsetStrategy firstPollOffsetStrategy, TridentCollector collectorMock, TopicPartition tp, Map<String, Object> previousBatchMeta) {
KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(firstPollOffsetStrategy);
TransactionAttempt txid = new TransactionAttempt(10L, 0);
KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(tp);
return emitter.emitPartitionBatchNew(txid, collectorMock, kttp, previousBatchMeta);
}
use of org.apache.storm.trident.topology.TransactionAttempt in project storm by apache.
the class KafkaTridentSpoutEmitterEmitTest method testEmitEmptyFirstBatch.
@Test
public void testEmitEmptyFirstBatch() {
/**
* Check that when the first batch after a redeploy is empty, the emitter does not restart at the pre-redeploy offset. STORM-3279.
*/
long firstEmittedOffset = 50;
int emittedRecords = 10;
KafkaTridentSpoutBatchMetadata preRedeployLastMeta = new KafkaTridentSpoutBatchMetadata(firstEmittedOffset, firstEmittedOffset + emittedRecords - 1, "an old topology");
KafkaTridentSpoutEmitter<String, String> emitter = createEmitter(FirstPollOffsetStrategy.LATEST);
TransactionAttempt txid = new TransactionAttempt(0L, 0);
KafkaTridentSpoutTopicPartition kttp = new KafkaTridentSpoutTopicPartition(partition);
Map<String, Object> meta = emitter.emitPartitionBatchNew(txid, collectorMock, kttp, preRedeployLastMeta.toMap());
verify(collectorMock, never()).emit(anyList());
KafkaTridentSpoutBatchMetadata deserializedMeta = KafkaTridentSpoutBatchMetadata.fromMap(meta);
assertThat(deserializedMeta.getFirstOffset(), is(lastOffsetInKafka));
assertThat(deserializedMeta.getLastOffset(), is(lastOffsetInKafka));
long firstNewRecordOffset = lastOffsetInKafka + 1;
int numNewRecords = 10;
List<ConsumerRecord<String, String>> newRecords = SpoutWithMockedConsumerSetupHelper.createRecords(partition, firstNewRecordOffset, numNewRecords);
newRecords.forEach(consumer::addRecord);
meta = emitter.emitPartitionBatchNew(txid, collectorMock, kttp, meta);
verify(collectorMock, times(numNewRecords)).emit(emitCaptor.capture());
List<List<Object>> emits = emitCaptor.getAllValues();
assertThat(emits.get(0).get(0), is(firstNewRecordOffset));
assertThat(emits.get(emits.size() - 1).get(0), is(firstNewRecordOffset + numNewRecords - 1));
deserializedMeta = KafkaTridentSpoutBatchMetadata.fromMap(meta);
assertThat("The batch should start at the first offset of the polled records", deserializedMeta.getFirstOffset(), is(firstNewRecordOffset));
assertThat("The batch should end at the last offset of the polled messages", deserializedMeta.getLastOffset(), is(firstNewRecordOffset + numNewRecords - 1));
}
Aggregations