Search in sources :

Example 1 with StreamConsumer

use of co.cask.cdap.data2.transaction.stream.StreamConsumer in project cdap by caskdata.

the class HBaseStreamFileConsumerFactory method create.

@Override
protected StreamConsumer create(TableId tableId, StreamConfig streamConfig, ConsumerConfig consumerConfig, StreamConsumerStateStore stateStore, StreamConsumerState beginConsumerState, FileReader<StreamEventOffset, Iterable<StreamFileOffset>> reader, @Nullable ReadFilter extraFilter) throws IOException {
    int splits = cConf.getInt(Constants.Stream.CONSUMER_TABLE_PRESPLITS);
    AbstractRowKeyDistributor distributor = new RowKeyDistributorByHashPrefix(new RowKeyDistributorByHashPrefix.OneByteSimpleHash(splits));
    byte[][] splitKeys = HBaseTableUtil.getSplitKeys(splits, splits, distributor);
    TableId hBaseTableId = tableUtil.createHTableId(new NamespaceId(tableId.getNamespace()), tableId.getTableName());
    TableDescriptorBuilder tdBuilder = HBaseTableUtil.getTableDescriptorBuilder(hBaseTableId, cConf);
    ColumnFamilyDescriptorBuilder cfdBuilder = HBaseTableUtil.getColumnFamilyDescriptorBuilder(Bytes.toString(QueueEntryRow.COLUMN_FAMILY), hConf);
    tdBuilder.addColumnFamily(cfdBuilder.build());
    tdBuilder.addProperty(QueueConstants.DISTRIBUTOR_BUCKETS, Integer.toString(splits));
    try (HBaseDDLExecutor ddlExecutor = ddlExecutorFactory.get()) {
        ddlExecutor.createTableIfNotExists(tdBuilder.build(), splitKeys);
    }
    HTable hTable = tableUtil.createHTable(hConf, hBaseTableId);
    hTable.setWriteBufferSize(Constants.Stream.HBASE_WRITE_BUFFER_SIZE);
    hTable.setAutoFlushTo(false);
    return new HBaseStreamFileConsumer(cConf, streamConfig, consumerConfig, tableUtil, hTable, reader, stateStore, beginConsumerState, extraFilter, createKeyDistributor(hTable.getTableDescriptor()));
}
Also used : TableId(co.cask.cdap.data2.util.TableId) HBaseDDLExecutor(co.cask.cdap.spi.hbase.HBaseDDLExecutor) RowKeyDistributorByHashPrefix(co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix) AbstractRowKeyDistributor(co.cask.cdap.hbase.wd.AbstractRowKeyDistributor) ColumnFamilyDescriptorBuilder(co.cask.cdap.data2.util.hbase.ColumnFamilyDescriptorBuilder) TableDescriptorBuilder(co.cask.cdap.data2.util.hbase.TableDescriptorBuilder) NamespaceId(co.cask.cdap.proto.id.NamespaceId) HTable(org.apache.hadoop.hbase.client.HTable)

Example 2 with StreamConsumer

use of co.cask.cdap.data2.transaction.stream.StreamConsumer in project cdap by caskdata.

the class AbstractStreamFileConsumerFactory method create.

@Override
public final StreamConsumer create(StreamId streamId, String namespace, ConsumerConfig consumerConfig) throws IOException {
    StreamConfig streamConfig = StreamUtils.ensureExists(streamAdmin, streamId);
    TableId tableId = getTableId(streamId, namespace);
    StreamConsumerStateStore stateStore = stateStoreFactory.create(streamConfig);
    StreamConsumerState consumerState = stateStore.get(consumerConfig.getGroupId(), consumerConfig.getInstanceId());
    return create(tableId, streamConfig, consumerConfig, stateStore, consumerState, createReader(streamConfig, consumerState), new TTLReadFilter(streamConfig.getTTL()));
}
Also used : TableId(co.cask.cdap.data2.util.TableId) TTLReadFilter(co.cask.cdap.data.file.filter.TTLReadFilter)

Example 3 with StreamConsumer

use of co.cask.cdap.data2.transaction.stream.StreamConsumer in project cdap by caskdata.

the class FlowletProgramRunner method processSpecificationFactory.

private ProcessSpecificationFactory processSpecificationFactory(final BasicFlowletContext flowletContext, final DataFabricFacade dataFabricFacade, final QueueReaderFactory queueReaderFactory, final String flowletName, final Table<Node, String, Set<QueueSpecification>> queueSpecs, final ImmutableList.Builder<ConsumerSupplier<?>> queueConsumerSupplierBuilder, final SchemaCache schemaCache) {
    return new ProcessSpecificationFactory() {

        @Override
        public <T> ProcessSpecification create(Set<String> inputNames, Schema schema, TypeToken<T> dataType, ProcessMethod<T> method, ConsumerConfig consumerConfig, int batchSize, Tick tickAnnotation) throws Exception {
            List<QueueReader<T>> queueReaders = Lists.newLinkedList();
            for (Map.Entry<Node, Set<QueueSpecification>> entry : queueSpecs.column(flowletName).entrySet()) {
                for (QueueSpecification queueSpec : entry.getValue()) {
                    final QueueName queueName = queueSpec.getQueueName();
                    if (queueSpec.getInputSchema().equals(schema) && (inputNames.contains(queueName.getSimpleName()) || inputNames.contains(FlowletDefinition.ANY_INPUT))) {
                        Node sourceNode = entry.getKey();
                        if (sourceNode.getType() == FlowletConnection.Type.STREAM) {
                            ConsumerSupplier<StreamConsumer> consumerSupplier = ConsumerSupplier.create(flowletContext.getOwners(), runtimeUsageRegistry, dataFabricFacade, queueName, consumerConfig);
                            queueConsumerSupplierBuilder.add(consumerSupplier);
                            // No decoding is needed, as a process method can only have StreamEvent as type for consuming stream
                            Function<StreamEvent, T> decoder = wrapInputDecoder(flowletContext, null, queueName, new Function<StreamEvent, T>() {

                                @Override
                                @SuppressWarnings("unchecked")
                                public T apply(StreamEvent input) {
                                    return (T) input;
                                }
                            });
                            queueReaders.add(queueReaderFactory.createStreamReader(queueName.toStreamId(), consumerSupplier, batchSize, decoder));
                        } else {
                            int numGroups = getNumGroups(Iterables.concat(queueSpecs.row(entry.getKey()).values()), queueName);
                            Function<ByteBuffer, T> decoder = wrapInputDecoder(// the producer flowlet,
                            flowletContext, // the producer flowlet,
                            entry.getKey().getName(), queueName, createInputDatumDecoder(dataType, schema, schemaCache));
                            ConsumerSupplier<QueueConsumer> consumerSupplier = ConsumerSupplier.create(flowletContext.getOwners(), runtimeUsageRegistry, dataFabricFacade, queueName, consumerConfig, numGroups);
                            queueConsumerSupplierBuilder.add(consumerSupplier);
                            queueReaders.add(queueReaderFactory.createQueueReader(consumerSupplier, batchSize, decoder));
                        }
                    }
                }
            }
            // If inputs is needed but there is no available input queue, return null
            if (!inputNames.isEmpty() && queueReaders.isEmpty()) {
                return null;
            }
            return new ProcessSpecification<>(new RoundRobinQueueReader<>(queueReaders), method, tickAnnotation);
        }
    };
}
Also used : QueueReader(co.cask.cdap.app.queue.QueueReader) RoundRobinQueueReader(co.cask.cdap.internal.app.queue.RoundRobinQueueReader) Set(java.util.Set) ImmutableSet(com.google.common.collect.ImmutableSet) Schema(co.cask.cdap.api.data.schema.Schema) Node(co.cask.cdap.app.queue.QueueSpecificationGenerator.Node) ConsumerConfig(co.cask.cdap.data2.queue.ConsumerConfig) Tick(co.cask.cdap.api.annotation.Tick) QueueName(co.cask.cdap.common.queue.QueueName) StreamConsumer(co.cask.cdap.data2.transaction.stream.StreamConsumer) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ByteBuffer(java.nio.ByteBuffer) QueueConsumer(co.cask.cdap.data2.queue.QueueConsumer) TypeToken(com.google.common.reflect.TypeToken) QueueSpecification(co.cask.cdap.app.queue.QueueSpecification) Map(java.util.Map)

Example 4 with StreamConsumer

use of co.cask.cdap.data2.transaction.stream.StreamConsumer in project cdap by caskdata.

the class StreamConsumerTestBase method testNamespacedStreamConsumers.

@Test
public void testNamespacedStreamConsumers() throws Exception {
    // Test two consumers for two streams with the same name, but in different namespaces. Their consumption should be
    // independent of the other.
    String stream = "testNamespacedStreamConsumers";
    StreamId streamId = TEST_NAMESPACE.stream(stream);
    StreamId otherStreamId = OTHER_NAMESPACE.stream(stream);
    StreamAdmin streamAdmin = getStreamAdmin();
    streamAdmin.create(streamId);
    streamAdmin.create(otherStreamId);
    StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    StreamConfig otherStreamConfig = streamAdmin.getConfig(otherStreamId);
    // Writes 5 events to both streams
    writeEvents(streamConfig, "Testing ", 5);
    writeEvents(otherStreamConfig, "Testing ", 5);
    streamAdmin.configureInstances(streamId, 0L, 1);
    streamAdmin.configureInstances(otherStreamId, 0L, 1);
    StreamConsumerFactory consumerFactory = getConsumerFactory();
    StreamConsumer consumer = consumerFactory.create(streamId, "fifo.rollback", new ConsumerConfig(0L, 0, 1, DequeueStrategy.FIFO, null));
    StreamConsumer otherConsumer = consumerFactory.create(otherStreamId, "fifo.rollback", new ConsumerConfig(0L, 0, 1, DequeueStrategy.FIFO, null));
    // Try to dequeue using both consumers
    TransactionContext context = createTxContext(consumer);
    TransactionContext otherContext = createTxContext(otherConsumer);
    context.start();
    otherContext.start();
    // Consume events from the stream in the default namespace
    DequeueResult<StreamEvent> result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 0", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    context.finish();
    context.start();
    result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 1", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    context.finish();
    context.start();
    result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 2", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    context.finish();
    context.start();
    // Even though a stream with the same name has already consumed 3 events, the otherConsumer is for a stream in a
    // different namespace, so it will still be on the initial event.
    DequeueResult<StreamEvent> result1 = otherConsumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 0", Charsets.UTF_8.decode(result1.iterator().next().getBody()).toString());
    otherContext.finish();
    otherContext.start();
    result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
    result1 = otherConsumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 3", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    Assert.assertEquals("Testing 1", Charsets.UTF_8.decode(result1.iterator().next().getBody()).toString());
    // Commit both
    context.finish();
    otherContext.finish();
    consumer.close();
    otherConsumer.close();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) TransactionContext(org.apache.tephra.TransactionContext) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ConsumerConfig(co.cask.cdap.data2.queue.ConsumerConfig) Test(org.junit.Test)

Example 5 with StreamConsumer

use of co.cask.cdap.data2.transaction.stream.StreamConsumer in project cdap by caskdata.

the class StreamConsumerTestBase method testTTLStartingFile.

@Category(SlowTests.class)
@Test
public void testTTLStartingFile() throws Exception {
    String stream = "testTTLStartingFile";
    StreamId streamId = TEST_NAMESPACE.stream(stream);
    StreamAdmin streamAdmin = getStreamAdmin();
    // Create stream with ttl of 3 seconds and partition duration of 3 seconds
    final long ttl = TimeUnit.SECONDS.toMillis(3);
    Properties streamProperties = new Properties();
    streamProperties.setProperty(Constants.Stream.TTL, Long.toString(ttl));
    streamProperties.setProperty(Constants.Stream.PARTITION_DURATION, Long.toString(ttl));
    streamAdmin.create(streamId, streamProperties);
    StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
    StreamConsumerFactory consumerFactory = getConsumerFactory();
    StreamConsumer consumer = consumerFactory.create(streamId, stream, new ConsumerConfig(0L, 0, 1, DequeueStrategy.FIFO, null));
    StreamConsumer newConsumer;
    Set<StreamEvent> expectedEvents = Sets.newTreeSet(STREAM_EVENT_COMPARATOR);
    try {
        // Create a new consumer for second consumer verification.
        // Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
        newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
        // write 20 events in a partition that will be expired due to sleeping the TTL
        writeEvents(streamConfig, "Phase 0 expired event ", 20);
        Thread.sleep(ttl);
        verifyEvents(consumer, expectedEvents);
        // also verify for a new consumer
        try {
            verifyEvents(newConsumer, expectedEvents);
        } finally {
            newConsumer.close();
        }
        // Create a new consumer for second consumer verification (with clean state)
        // Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1));
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
        newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
        // write 20 events in a partition and read it back immediately. They shouldn't expired.
        expectedEvents.addAll(writeEvents(streamConfig, "Phase 1 non-expired event ", 20));
        verifyEvents(consumer, expectedEvents);
        // also verify for a new consumer
        try {
            verifyEvents(newConsumer, expectedEvents);
        } finally {
            newConsumer.close();
        }
        // Create a new consumer for second consumer verification (with clean state)
        // Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1));
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
        newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
        // write 20 events in a partition that will be expired due to sleeping the TTL
        // This will write to a new partition different then the first batch write.
        // Also, because it sleep TTL time, the previous batch write would also get expired.
        expectedEvents.clear();
        writeEvents(streamConfig, "Phase 2 expired event ", 20);
        Thread.sleep(ttl);
        verifyEvents(consumer, expectedEvents);
        // also verify for a new consumer
        try {
            verifyEvents(newConsumer, expectedEvents);
        } finally {
            newConsumer.close();
        }
        // Create a new consumer for second consumer verification (with clean state)
        // Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1));
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
        newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
        // write 20 events in a partition and read it back immediately. They shouldn't expire.
        expectedEvents.addAll(writeEvents(streamConfig, "Phase 3 non-expired event ", 20));
        verifyEvents(consumer, expectedEvents);
        // also verify for a new consumer
        try {
            verifyEvents(newConsumer, expectedEvents);
        } finally {
            newConsumer.close();
        }
        // Should be no more pending events
        expectedEvents.clear();
        verifyEvents(consumer, expectedEvents);
    } finally {
        consumer.close();
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ConsumerConfig(co.cask.cdap.data2.queue.ConsumerConfig) Properties(java.util.Properties) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)8 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)7 StreamId (co.cask.cdap.proto.id.StreamId)7 Test (org.junit.Test)6 TransactionContext (org.apache.tephra.TransactionContext)5 QueueConsumer (co.cask.cdap.data2.queue.QueueConsumer)3 StreamConsumer (co.cask.cdap.data2.transaction.stream.StreamConsumer)3 Properties (java.util.Properties)3 QueueName (co.cask.cdap.common.queue.QueueName)2 TableId (co.cask.cdap.data2.util.TableId)2 Tick (co.cask.cdap.api.annotation.Tick)1 Schema (co.cask.cdap.api.data.schema.Schema)1 QueueReader (co.cask.cdap.app.queue.QueueReader)1 QueueSpecification (co.cask.cdap.app.queue.QueueSpecification)1 Node (co.cask.cdap.app.queue.QueueSpecificationGenerator.Node)1 ProgramContext (co.cask.cdap.data.ProgramContext)1 TTLReadFilter (co.cask.cdap.data.file.filter.TTLReadFilter)1 LevelDBTableCore (co.cask.cdap.data2.dataset2.lib.table.leveldb.LevelDBTableCore)1 ForwardingStreamConsumer (co.cask.cdap.data2.transaction.stream.ForwardingStreamConsumer)1 QueueToStreamConsumer (co.cask.cdap.data2.transaction.stream.QueueToStreamConsumer)1