Search in sources :

Example 6 with StreamConfig

use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.

the class HBaseStreamConsumerStateStoreFactory method create.

@Override
public synchronized StreamConsumerStateStore create(StreamConfig streamConfig) throws IOException {
    NamespaceId namespace = streamConfig.getStreamId().getParent();
    TableId streamStateStoreTableId = StreamUtils.getStateStoreTableId(namespace);
    TableId hbaseTableId = tableUtil.createHTableId(new NamespaceId(streamStateStoreTableId.getNamespace()), streamStateStoreTableId.getTableName());
    boolean tableExist;
    try (HBaseAdmin admin = new HBaseAdmin(hConf)) {
        tableExist = tableUtil.tableExists(admin, hbaseTableId);
    }
    if (!tableExist) {
        try (HBaseDDLExecutor ddlExecutor = ddlExecutorFactory.get()) {
            TableDescriptorBuilder tdBuilder = HBaseTableUtil.getTableDescriptorBuilder(hbaseTableId, cConf);
            ColumnFamilyDescriptorBuilder cfdBuilder = HBaseTableUtil.getColumnFamilyDescriptorBuilder(Bytes.toString(QueueEntryRow.COLUMN_FAMILY), hConf);
            tdBuilder.addColumnFamily(cfdBuilder.build());
            ddlExecutor.createTableIfNotExists(tdBuilder.build(), null);
        }
    }
    HTable hTable = tableUtil.createHTable(hConf, hbaseTableId);
    hTable.setWriteBufferSize(Constants.Stream.HBASE_WRITE_BUFFER_SIZE);
    hTable.setAutoFlushTo(false);
    return new HBaseStreamConsumerStateStore(streamConfig, hTable);
}
Also used : TableId(co.cask.cdap.data2.util.TableId) HBaseDDLExecutor(co.cask.cdap.spi.hbase.HBaseDDLExecutor) HBaseAdmin(org.apache.hadoop.hbase.client.HBaseAdmin) ColumnFamilyDescriptorBuilder(co.cask.cdap.data2.util.hbase.ColumnFamilyDescriptorBuilder) TableDescriptorBuilder(co.cask.cdap.data2.util.hbase.TableDescriptorBuilder) NamespaceId(co.cask.cdap.proto.id.NamespaceId) HTable(org.apache.hadoop.hbase.client.HTable)

Example 7 with StreamConfig

use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.

the class AbstractStreamFileConsumerFactory method create.

@Override
public final StreamConsumer create(StreamId streamId, String namespace, ConsumerConfig consumerConfig) throws IOException {
    StreamConfig streamConfig = StreamUtils.ensureExists(streamAdmin, streamId);
    TableId tableId = getTableId(streamId, namespace);
    StreamConsumerStateStore stateStore = stateStoreFactory.create(streamConfig);
    StreamConsumerState consumerState = stateStore.get(consumerConfig.getGroupId(), consumerConfig.getInstanceId());
    return create(tableId, streamConfig, consumerConfig, stateStore, consumerState, createReader(streamConfig, consumerState), new TTLReadFilter(streamConfig.getTTL()));
}
Also used : TableId(co.cask.cdap.data2.util.TableId) TTLReadFilter(co.cask.cdap.data.file.filter.TTLReadFilter)

Example 8 with StreamConfig

use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.

the class MultiLiveStreamFileReaderTestBase method testMultiFileReader.

@Test
public void testMultiFileReader() throws Exception {
    String streamName = "multiReader";
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    Location location = getLocationFactory().create(streamName);
    location.mkdirs();
    // Create a stream with 1 partition.
    StreamConfig config = new StreamConfig(streamId, Long.MAX_VALUE, 10000, Long.MAX_VALUE, location, null, 1000);
    // Write out 200 events in 5 files, with interleaving timestamps
    List<FileWriter<StreamEvent>> writers = Lists.newArrayList();
    for (int i = 0; i < 5; i++) {
        FileWriter<StreamEvent> writer = createWriter(config, "bucket" + i);
        writers.add(writer);
        for (int j = 0; j < 200; j++) {
            long timestamp = j * 5 + i;
            writer.append(StreamFileTestUtils.createEvent(timestamp, "Testing " + timestamp));
        }
    }
    // Flush all writers.
    for (FileWriter<StreamEvent> writer : writers) {
        writer.flush();
    }
    // Create a multi stream file reader
    List<StreamFileOffset> sources = Lists.newArrayList();
    Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), 0, Long.MAX_VALUE);
    for (int i = 0; i < 5; i++) {
        Location eventFile = StreamUtils.createStreamLocation(partitionLocation, "bucket" + i, 0, StreamFileType.EVENT);
        sources.add(new StreamFileOffset(eventFile, 0L, 0));
    }
    // Reads all events written so far.
    MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(config, sources);
    List<StreamEvent> events = Lists.newArrayList();
    long expectedTimestamp = 0L;
    for (int i = 0; i < 10; i++) {
        Assert.assertEquals(100, reader.read(events, 100, 0, TimeUnit.SECONDS));
        Assert.assertEquals(100, events.size());
        for (StreamEvent event : events) {
            Assert.assertEquals(expectedTimestamp, event.getTimestamp());
            Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
            expectedTimestamp++;
        }
        events.clear();
    }
    Assert.assertEquals(0, reader.read(events, 1, 1, TimeUnit.SECONDS));
    // Writes some more events to the first three writers.
    for (int i = 0; i < 3; i++) {
        FileWriter<StreamEvent> writer = writers.get(i);
        for (int j = 0; j < 10; j++) {
            long timestamp = 1000 + j * 3 + i;
            writer.append(StreamFileTestUtils.createEvent(timestamp, "Testing " + timestamp));
        }
    }
    // Close all writers
    for (FileWriter<StreamEvent> writer : writers) {
        writer.close();
    }
    // Continue to read
    Assert.assertEquals(30, reader.read(events, 30, 2, TimeUnit.SECONDS));
    Assert.assertEquals(30, events.size());
    for (StreamEvent event : events) {
        Assert.assertEquals(expectedTimestamp, event.getTimestamp());
        Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
        expectedTimestamp++;
    }
    // Should get no more events.
    Assert.assertEquals(0, reader.read(events, 1, 1, TimeUnit.SECONDS));
    reader.close();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) FileWriter(co.cask.cdap.data.file.FileWriter) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 9 with StreamConfig

use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.

the class StreamDataFileTestBase method testLiveStream.

/**
   * Test live stream reader with new partitions and/or sequence file being created over time.
   */
@Category(SlowTests.class)
@Test
public void testLiveStream() throws Exception {
    String streamName = "live";
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    final String filePrefix = "prefix";
    // 5 seconds
    long partitionDuration = 5000;
    Location location = getLocationFactory().create(streamName);
    location.mkdirs();
    final StreamConfig config = new StreamConfig(streamId, partitionDuration, 10000, Long.MAX_VALUE, location, null, 1000);
    // Create a thread that will write 10 event per second
    final AtomicInteger eventsWritten = new AtomicInteger();
    final List<Closeable> closeables = Lists.newArrayList();
    Thread writerThread = new Thread() {

        @Override
        public void run() {
            try {
                while (!interrupted()) {
                    FileWriter<StreamEvent> writer = createWriter(config, filePrefix);
                    closeables.add(writer);
                    for (int i = 0; i < 10; i++) {
                        long ts = System.currentTimeMillis();
                        writer.append(StreamFileTestUtils.createEvent(ts, "Testing"));
                        eventsWritten.getAndIncrement();
                    }
                    writer.flush();
                    TimeUnit.SECONDS.sleep(1);
                }
            } catch (IOException e) {
                LOG.error(e.getMessage(), e);
                throw Throwables.propagate(e);
            } catch (InterruptedException e) {
            // No-op
            }
        }
    };
    // Create a live reader start with one partition earlier than current time.
    long partitionStart = StreamUtils.getPartitionStartTime(System.currentTimeMillis() - config.getPartitionDuration(), config.getPartitionDuration());
    Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), partitionStart, config.getPartitionDuration());
    Location eventLocation = StreamUtils.createStreamLocation(partitionLocation, filePrefix, 0, StreamFileType.EVENT);
    // Creates a live stream reader that check for sequence file ever 100 millis.
    FileReader<PositionStreamEvent, StreamFileOffset> reader = new LiveStreamFileReader(config, new StreamFileOffset(eventLocation, 0L, 0), 100);
    List<StreamEvent> events = Lists.newArrayList();
    // Try to read, since the writer thread is not started, it should get nothing
    Assert.assertEquals(0, reader.read(events, 1, 2, TimeUnit.SECONDS));
    // Start the writer thread.
    writerThread.start();
    Stopwatch stopwatch = new Stopwatch();
    stopwatch.start();
    while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && reader.read(events, 1, 1, TimeUnit.SECONDS) == 0) {
    // Empty
    }
    stopwatch.stop();
    // Should be able to read a event
    Assert.assertEquals(1, events.size());
    TimeUnit.MILLISECONDS.sleep(partitionDuration * 2);
    writerThread.interrupt();
    writerThread.join();
    LOG.info("Writer stopped with {} events written.", eventsWritten.get());
    stopwatch.reset();
    while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && events.size() != eventsWritten.get()) {
        reader.read(events, eventsWritten.get(), 0, TimeUnit.SECONDS);
    }
    // Should see all events written
    Assert.assertEquals(eventsWritten.get(), events.size());
    // Take a snapshot of the offset.
    StreamFileOffset offset = new StreamFileOffset(reader.getPosition());
    reader.close();
    for (Closeable c : closeables) {
        Closeables.closeQuietly(c);
    }
    // Now creates a new writer to write 10 more events across two partitions with a skip one partition.
    try (FileWriter<StreamEvent> writer = createWriter(config, filePrefix)) {
        for (int i = 0; i < 5; i++) {
            long ts = System.currentTimeMillis();
            writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + ts));
        }
        TimeUnit.MILLISECONDS.sleep(partitionDuration * 3 / 2);
        for (int i = 0; i < 5; i++) {
            long ts = System.currentTimeMillis();
            writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + ts));
        }
    }
    // Create a new reader with the previous offset
    reader = new LiveStreamFileReader(config, offset, 100);
    events.clear();
    stopwatch.reset();
    while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && events.size() != 10) {
        reader.read(events, 10, 0, TimeUnit.SECONDS);
    }
    Assert.assertEquals(10, events.size());
    // Try to read more, should got nothing
    reader.read(events, 10, 2, TimeUnit.SECONDS);
    reader.close();
    for (Closeable c : closeables) {
        c.close();
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) Closeable(java.io.Closeable) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) Stopwatch(com.google.common.base.Stopwatch) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) IOException(java.io.IOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Location(org.apache.twill.filesystem.Location) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 10 with StreamConfig

use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.

the class StreamConsumerTestBase method testNamespacedStreamConsumers.

@Test
public void testNamespacedStreamConsumers() throws Exception {
    // Test two consumers for two streams with the same name, but in different namespaces. Their consumption should be
    // independent of the other.
    String stream = "testNamespacedStreamConsumers";
    StreamId streamId = TEST_NAMESPACE.stream(stream);
    StreamId otherStreamId = OTHER_NAMESPACE.stream(stream);
    StreamAdmin streamAdmin = getStreamAdmin();
    streamAdmin.create(streamId);
    streamAdmin.create(otherStreamId);
    StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    StreamConfig otherStreamConfig = streamAdmin.getConfig(otherStreamId);
    // Writes 5 events to both streams
    writeEvents(streamConfig, "Testing ", 5);
    writeEvents(otherStreamConfig, "Testing ", 5);
    streamAdmin.configureInstances(streamId, 0L, 1);
    streamAdmin.configureInstances(otherStreamId, 0L, 1);
    StreamConsumerFactory consumerFactory = getConsumerFactory();
    StreamConsumer consumer = consumerFactory.create(streamId, "fifo.rollback", new ConsumerConfig(0L, 0, 1, DequeueStrategy.FIFO, null));
    StreamConsumer otherConsumer = consumerFactory.create(otherStreamId, "fifo.rollback", new ConsumerConfig(0L, 0, 1, DequeueStrategy.FIFO, null));
    // Try to dequeue using both consumers
    TransactionContext context = createTxContext(consumer);
    TransactionContext otherContext = createTxContext(otherConsumer);
    context.start();
    otherContext.start();
    // Consume events from the stream in the default namespace
    DequeueResult<StreamEvent> result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 0", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    context.finish();
    context.start();
    result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 1", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    context.finish();
    context.start();
    result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 2", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    context.finish();
    context.start();
    // Even though a stream with the same name has already consumed 3 events, the otherConsumer is for a stream in a
    // different namespace, so it will still be on the initial event.
    DequeueResult<StreamEvent> result1 = otherConsumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 0", Charsets.UTF_8.decode(result1.iterator().next().getBody()).toString());
    otherContext.finish();
    otherContext.start();
    result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
    result1 = otherConsumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 3", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    Assert.assertEquals("Testing 1", Charsets.UTF_8.decode(result1.iterator().next().getBody()).toString());
    // Commit both
    context.finish();
    otherContext.finish();
    consumer.close();
    otherConsumer.close();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) TransactionContext(org.apache.tephra.TransactionContext) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ConsumerConfig(co.cask.cdap.data2.queue.ConsumerConfig) Test(org.junit.Test)

Aggregations

StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)18 StreamId (co.cask.cdap.proto.id.StreamId)18 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)15 Test (org.junit.Test)14 Location (org.apache.twill.filesystem.Location)10 IOException (java.io.IOException)7 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)6 NamespaceId (co.cask.cdap.proto.id.NamespaceId)6 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)5 TableId (co.cask.cdap.data2.util.TableId)5 TransactionContext (org.apache.tephra.TransactionContext)5 NotificationFeedException (co.cask.cdap.notifications.feeds.NotificationFeedException)3 FileNotFoundException (java.io.FileNotFoundException)3 Properties (java.util.Properties)3 StreamSpecification (co.cask.cdap.api.data.stream.StreamSpecification)2 FileWriter (co.cask.cdap.data.file.FileWriter)2 LevelDBTableCore (co.cask.cdap.data2.dataset2.lib.table.leveldb.LevelDBTableCore)2 ColumnFamilyDescriptorBuilder (co.cask.cdap.data2.util.hbase.ColumnFamilyDescriptorBuilder)2 TableDescriptorBuilder (co.cask.cdap.data2.util.hbase.TableDescriptorBuilder)2 HBaseDDLExecutor (co.cask.cdap.spi.hbase.HBaseDDLExecutor)2