use of co.cask.cdap.data2.transaction.stream.StreamConsumer in project cdap by caskdata.
the class HBaseStreamFileConsumerFactory method create.
@Override
protected StreamConsumer create(TableId tableId, StreamConfig streamConfig, ConsumerConfig consumerConfig, StreamConsumerStateStore stateStore, StreamConsumerState beginConsumerState, FileReader<StreamEventOffset, Iterable<StreamFileOffset>> reader, @Nullable ReadFilter extraFilter) throws IOException {
int splits = cConf.getInt(Constants.Stream.CONSUMER_TABLE_PRESPLITS);
AbstractRowKeyDistributor distributor = new RowKeyDistributorByHashPrefix(new RowKeyDistributorByHashPrefix.OneByteSimpleHash(splits));
byte[][] splitKeys = HBaseTableUtil.getSplitKeys(splits, splits, distributor);
TableId hBaseTableId = tableUtil.createHTableId(new NamespaceId(tableId.getNamespace()), tableId.getTableName());
TableDescriptorBuilder tdBuilder = HBaseTableUtil.getTableDescriptorBuilder(hBaseTableId, cConf);
ColumnFamilyDescriptorBuilder cfdBuilder = HBaseTableUtil.getColumnFamilyDescriptorBuilder(Bytes.toString(QueueEntryRow.COLUMN_FAMILY), hConf);
tdBuilder.addColumnFamily(cfdBuilder.build());
tdBuilder.addProperty(QueueConstants.DISTRIBUTOR_BUCKETS, Integer.toString(splits));
try (HBaseDDLExecutor ddlExecutor = ddlExecutorFactory.get()) {
ddlExecutor.createTableIfNotExists(tdBuilder.build(), splitKeys);
}
HTable hTable = tableUtil.createHTable(hConf, hBaseTableId);
hTable.setWriteBufferSize(Constants.Stream.HBASE_WRITE_BUFFER_SIZE);
hTable.setAutoFlushTo(false);
return new HBaseStreamFileConsumer(cConf, streamConfig, consumerConfig, tableUtil, hTable, reader, stateStore, beginConsumerState, extraFilter, createKeyDistributor(hTable.getTableDescriptor()));
}
use of co.cask.cdap.data2.transaction.stream.StreamConsumer in project cdap by caskdata.
the class AbstractStreamFileConsumerFactory method create.
@Override
public final StreamConsumer create(StreamId streamId, String namespace, ConsumerConfig consumerConfig) throws IOException {
StreamConfig streamConfig = StreamUtils.ensureExists(streamAdmin, streamId);
TableId tableId = getTableId(streamId, namespace);
StreamConsumerStateStore stateStore = stateStoreFactory.create(streamConfig);
StreamConsumerState consumerState = stateStore.get(consumerConfig.getGroupId(), consumerConfig.getInstanceId());
return create(tableId, streamConfig, consumerConfig, stateStore, consumerState, createReader(streamConfig, consumerState), new TTLReadFilter(streamConfig.getTTL()));
}
use of co.cask.cdap.data2.transaction.stream.StreamConsumer in project cdap by caskdata.
the class FlowletProgramRunner method processSpecificationFactory.
private ProcessSpecificationFactory processSpecificationFactory(final BasicFlowletContext flowletContext, final DataFabricFacade dataFabricFacade, final QueueReaderFactory queueReaderFactory, final String flowletName, final Table<Node, String, Set<QueueSpecification>> queueSpecs, final ImmutableList.Builder<ConsumerSupplier<?>> queueConsumerSupplierBuilder, final SchemaCache schemaCache) {
return new ProcessSpecificationFactory() {
@Override
public <T> ProcessSpecification create(Set<String> inputNames, Schema schema, TypeToken<T> dataType, ProcessMethod<T> method, ConsumerConfig consumerConfig, int batchSize, Tick tickAnnotation) throws Exception {
List<QueueReader<T>> queueReaders = Lists.newLinkedList();
for (Map.Entry<Node, Set<QueueSpecification>> entry : queueSpecs.column(flowletName).entrySet()) {
for (QueueSpecification queueSpec : entry.getValue()) {
final QueueName queueName = queueSpec.getQueueName();
if (queueSpec.getInputSchema().equals(schema) && (inputNames.contains(queueName.getSimpleName()) || inputNames.contains(FlowletDefinition.ANY_INPUT))) {
Node sourceNode = entry.getKey();
if (sourceNode.getType() == FlowletConnection.Type.STREAM) {
ConsumerSupplier<StreamConsumer> consumerSupplier = ConsumerSupplier.create(flowletContext.getOwners(), runtimeUsageRegistry, dataFabricFacade, queueName, consumerConfig);
queueConsumerSupplierBuilder.add(consumerSupplier);
// No decoding is needed, as a process method can only have StreamEvent as type for consuming stream
Function<StreamEvent, T> decoder = wrapInputDecoder(flowletContext, null, queueName, new Function<StreamEvent, T>() {
@Override
@SuppressWarnings("unchecked")
public T apply(StreamEvent input) {
return (T) input;
}
});
queueReaders.add(queueReaderFactory.createStreamReader(queueName.toStreamId(), consumerSupplier, batchSize, decoder));
} else {
int numGroups = getNumGroups(Iterables.concat(queueSpecs.row(entry.getKey()).values()), queueName);
Function<ByteBuffer, T> decoder = wrapInputDecoder(// the producer flowlet,
flowletContext, // the producer flowlet,
entry.getKey().getName(), queueName, createInputDatumDecoder(dataType, schema, schemaCache));
ConsumerSupplier<QueueConsumer> consumerSupplier = ConsumerSupplier.create(flowletContext.getOwners(), runtimeUsageRegistry, dataFabricFacade, queueName, consumerConfig, numGroups);
queueConsumerSupplierBuilder.add(consumerSupplier);
queueReaders.add(queueReaderFactory.createQueueReader(consumerSupplier, batchSize, decoder));
}
}
}
}
// If inputs is needed but there is no available input queue, return null
if (!inputNames.isEmpty() && queueReaders.isEmpty()) {
return null;
}
return new ProcessSpecification<>(new RoundRobinQueueReader<>(queueReaders), method, tickAnnotation);
}
};
}
use of co.cask.cdap.data2.transaction.stream.StreamConsumer in project cdap by caskdata.
the class StreamConsumerTestBase method testNamespacedStreamConsumers.
@Test
public void testNamespacedStreamConsumers() throws Exception {
// Test two consumers for two streams with the same name, but in different namespaces. Their consumption should be
// independent of the other.
String stream = "testNamespacedStreamConsumers";
StreamId streamId = TEST_NAMESPACE.stream(stream);
StreamId otherStreamId = OTHER_NAMESPACE.stream(stream);
StreamAdmin streamAdmin = getStreamAdmin();
streamAdmin.create(streamId);
streamAdmin.create(otherStreamId);
StreamConfig streamConfig = streamAdmin.getConfig(streamId);
StreamConfig otherStreamConfig = streamAdmin.getConfig(otherStreamId);
// Writes 5 events to both streams
writeEvents(streamConfig, "Testing ", 5);
writeEvents(otherStreamConfig, "Testing ", 5);
streamAdmin.configureInstances(streamId, 0L, 1);
streamAdmin.configureInstances(otherStreamId, 0L, 1);
StreamConsumerFactory consumerFactory = getConsumerFactory();
StreamConsumer consumer = consumerFactory.create(streamId, "fifo.rollback", new ConsumerConfig(0L, 0, 1, DequeueStrategy.FIFO, null));
StreamConsumer otherConsumer = consumerFactory.create(otherStreamId, "fifo.rollback", new ConsumerConfig(0L, 0, 1, DequeueStrategy.FIFO, null));
// Try to dequeue using both consumers
TransactionContext context = createTxContext(consumer);
TransactionContext otherContext = createTxContext(otherConsumer);
context.start();
otherContext.start();
// Consume events from the stream in the default namespace
DequeueResult<StreamEvent> result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
Assert.assertEquals("Testing 0", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
context.finish();
context.start();
result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
Assert.assertEquals("Testing 1", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
context.finish();
context.start();
result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
Assert.assertEquals("Testing 2", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
context.finish();
context.start();
// Even though a stream with the same name has already consumed 3 events, the otherConsumer is for a stream in a
// different namespace, so it will still be on the initial event.
DequeueResult<StreamEvent> result1 = otherConsumer.poll(1, 1, TimeUnit.SECONDS);
Assert.assertEquals("Testing 0", Charsets.UTF_8.decode(result1.iterator().next().getBody()).toString());
otherContext.finish();
otherContext.start();
result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
result1 = otherConsumer.poll(1, 1, TimeUnit.SECONDS);
Assert.assertEquals("Testing 3", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
Assert.assertEquals("Testing 1", Charsets.UTF_8.decode(result1.iterator().next().getBody()).toString());
// Commit both
context.finish();
otherContext.finish();
consumer.close();
otherConsumer.close();
}
use of co.cask.cdap.data2.transaction.stream.StreamConsumer in project cdap by caskdata.
the class StreamConsumerTestBase method testTTLStartingFile.
@Category(SlowTests.class)
@Test
public void testTTLStartingFile() throws Exception {
String stream = "testTTLStartingFile";
StreamId streamId = TEST_NAMESPACE.stream(stream);
StreamAdmin streamAdmin = getStreamAdmin();
// Create stream with ttl of 3 seconds and partition duration of 3 seconds
final long ttl = TimeUnit.SECONDS.toMillis(3);
Properties streamProperties = new Properties();
streamProperties.setProperty(Constants.Stream.TTL, Long.toString(ttl));
streamProperties.setProperty(Constants.Stream.PARTITION_DURATION, Long.toString(ttl));
streamAdmin.create(streamId, streamProperties);
StreamConfig streamConfig = streamAdmin.getConfig(streamId);
streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
StreamConsumerFactory consumerFactory = getConsumerFactory();
StreamConsumer consumer = consumerFactory.create(streamId, stream, new ConsumerConfig(0L, 0, 1, DequeueStrategy.FIFO, null));
StreamConsumer newConsumer;
Set<StreamEvent> expectedEvents = Sets.newTreeSet(STREAM_EVENT_COMPARATOR);
try {
// Create a new consumer for second consumer verification.
// Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
// write 20 events in a partition that will be expired due to sleeping the TTL
writeEvents(streamConfig, "Phase 0 expired event ", 20);
Thread.sleep(ttl);
verifyEvents(consumer, expectedEvents);
// also verify for a new consumer
try {
verifyEvents(newConsumer, expectedEvents);
} finally {
newConsumer.close();
}
// Create a new consumer for second consumer verification (with clean state)
// Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1));
streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
// write 20 events in a partition and read it back immediately. They shouldn't expired.
expectedEvents.addAll(writeEvents(streamConfig, "Phase 1 non-expired event ", 20));
verifyEvents(consumer, expectedEvents);
// also verify for a new consumer
try {
verifyEvents(newConsumer, expectedEvents);
} finally {
newConsumer.close();
}
// Create a new consumer for second consumer verification (with clean state)
// Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1));
streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
// write 20 events in a partition that will be expired due to sleeping the TTL
// This will write to a new partition different then the first batch write.
// Also, because it sleep TTL time, the previous batch write would also get expired.
expectedEvents.clear();
writeEvents(streamConfig, "Phase 2 expired event ", 20);
Thread.sleep(ttl);
verifyEvents(consumer, expectedEvents);
// also verify for a new consumer
try {
verifyEvents(newConsumer, expectedEvents);
} finally {
newConsumer.close();
}
// Create a new consumer for second consumer verification (with clean state)
// Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1));
streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
// write 20 events in a partition and read it back immediately. They shouldn't expire.
expectedEvents.addAll(writeEvents(streamConfig, "Phase 3 non-expired event ", 20));
verifyEvents(consumer, expectedEvents);
// also verify for a new consumer
try {
verifyEvents(newConsumer, expectedEvents);
} finally {
newConsumer.close();
}
// Should be no more pending events
expectedEvents.clear();
verifyEvents(consumer, expectedEvents);
} finally {
consumer.close();
}
}
Aggregations