Search in sources :

Example 26 with StreamConfig

use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.

the class StreamHandler method createContentWriterFactory.

/**
   * Creates a {@link ContentWriterFactory} based on the request size. Used by the batch endpoint.
   */
private ContentWriterFactory createContentWriterFactory(StreamId streamId, HttpRequest request) throws IOException {
    String contentType = HttpHeaders.getHeader(request, HttpHeaders.Names.CONTENT_TYPE, "");
    // The content-type is guaranteed to be non-empty, otherwise the batch request itself will fail.
    Map<String, String> headers = getHeaders(request, streamId.getEntityName(), ImmutableMap.<String, String>builder().put("content.type", contentType));
    StreamConfig config = streamAdmin.getConfig(streamId);
    return new LengthBasedContentWriterFactory(config, streamWriter, headers, batchBufferThreshold, impersonator);
}
Also used : LengthBasedContentWriterFactory(co.cask.cdap.data.stream.service.upload.LengthBasedContentWriterFactory) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig)

Example 27 with StreamConfig

use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.

the class StreamCoordinatorTestBase method testConfig.

@Test
public void testConfig() throws Exception {
    final StreamAdmin streamAdmin = getStreamAdmin();
    final String streamName = "testConfig";
    final StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    streamAdmin.create(streamId);
    StreamCoordinatorClient coordinator = getStreamCoordinator();
    final BlockingDeque<Integer> thresholds = new LinkedBlockingDeque<>();
    final BlockingDeque<Long> ttls = new LinkedBlockingDeque<>();
    coordinator.addListener(streamId, new StreamPropertyListener() {

        @Override
        public void thresholdChanged(StreamId streamId, int threshold) {
            thresholds.add(threshold);
        }

        @Override
        public void ttlChanged(StreamId streamId, long ttl) {
            ttls.add(ttl);
        }
    });
    // Have two threads, one update the threshold, one update the ttl
    final CyclicBarrier barrier = new CyclicBarrier(2);
    final CountDownLatch completeLatch = new CountDownLatch(2);
    for (int i = 0; i < 2; i++) {
        final int threadId = i;
        Thread t = new Thread() {

            @Override
            public void run() {
                try {
                    barrier.await();
                    for (int i = 0; i < 100; i++) {
                        Long ttl = (threadId == 0) ? (long) (i * 1000) : null;
                        Integer threshold = (threadId == 1) ? i : null;
                        streamAdmin.updateConfig(streamId, new StreamProperties(ttl, null, threshold));
                    }
                    completeLatch.countDown();
                } catch (Exception e) {
                    throw Throwables.propagate(e);
                }
            }
        };
        t.start();
    }
    Assert.assertTrue(completeLatch.await(60, TimeUnit.SECONDS));
    // Check the last threshold and ttl are correct. We don't check if the listener gets every update as it's
    // possible that it doesn't see every updates, but only the latest value (that's what ZK watch guarantees).
    Assert.assertTrue(validateLastElement(thresholds, 99));
    Assert.assertTrue(validateLastElement(ttls, 99000L));
    // Verify the config is right
    StreamConfig config = streamAdmin.getConfig(streamId);
    Assert.assertEquals(99, config.getNotificationThresholdMB());
    Assert.assertEquals(99000L, config.getTTL());
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) LinkedBlockingDeque(java.util.concurrent.LinkedBlockingDeque) StreamProperties(co.cask.cdap.proto.StreamProperties) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) CountDownLatch(java.util.concurrent.CountDownLatch) IOException(java.io.IOException) CyclicBarrier(java.util.concurrent.CyclicBarrier) StreamAdmin(co.cask.cdap.data2.transaction.stream.StreamAdmin) Test(org.junit.Test)

Example 28 with StreamConfig

use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.

the class StreamFileJanitorTestBase method testCleanupTTL.

@Test
public void testCleanupTTL() throws Exception {
    // Create a stream with 5 seconds TTL, partition duration of 2 seconds
    String streamName = "testCleanupTTL";
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    StreamAdmin streamAdmin = getStreamAdmin();
    StreamFileJanitor janitor = new StreamFileJanitor(getCConfiguration(), getStreamAdmin(), getNamespacedLocationFactory(), getNamespaceAdmin(), impersonator);
    Properties properties = new Properties();
    properties.setProperty(Constants.Stream.PARTITION_DURATION, "2000");
    properties.setProperty(Constants.Stream.TTL, "5000");
    streamAdmin.create(streamId, properties);
    // Truncate to increment generation to 1. This make verification condition easier (won't affect correctness).
    streamAdmin.truncate(streamId);
    StreamConfig config = streamAdmin.getConfig(streamId);
    // Write data with different timestamps that spans across 5 partitions
    FileWriter<StreamEvent> writer = createWriter(streamId);
    for (int i = 0; i < 10; i++) {
        writer.append(StreamFileTestUtils.createEvent(i * 1000, "Testing " + i));
    }
    writer.close();
    // Should see 5 partitions
    Location generationLocation = StreamUtils.createGenerationLocation(config.getLocation(), 1);
    Assert.assertEquals(5, generationLocation.list().size());
    // Perform clean with current time = 10000 (10 seconds since epoch).
    // Since TTL = 5 seconds, 2 partitions will be remove (Ends at 2000 and ends at 4000).
    janitor.clean(config.getLocation(), config.getTTL(), 10000);
    Assert.assertEquals(3, generationLocation.list().size());
    // Cleanup again with current time = 16000, all partitions should be deleted.
    janitor.clean(config.getLocation(), config.getTTL(), 16000);
    Assert.assertTrue(generationLocation.list().isEmpty());
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamAdmin(co.cask.cdap.data2.transaction.stream.StreamAdmin) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) Properties(java.util.Properties) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 29 with StreamConfig

use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.

the class MultiLiveStreamFileReaderTestBase method testOffsets.

@Test
public void testOffsets() throws Exception {
    String streamName = "offsets";
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    Location location = getLocationFactory().create(streamName);
    location.mkdirs();
    // Create a stream with 1 partition.
    StreamConfig config = new StreamConfig(streamId, Long.MAX_VALUE, 10000, Long.MAX_VALUE, location, null, 1000);
    // Write out 200 events in 5 files, with interleaving timestamps
    for (int i = 0; i < 5; i++) {
        FileWriter<StreamEvent> writer = createWriter(config, "bucket" + i);
        for (int j = 0; j < 200; j++) {
            long timestamp = j * 5 + i;
            writer.append(StreamFileTestUtils.createEvent(timestamp, "Testing " + timestamp));
        }
        writer.close();
    }
    // Create a multi reader
    List<StreamFileOffset> sources = Lists.newArrayList();
    Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), 0, Long.MAX_VALUE);
    for (int i = 0; i < 5; i++) {
        Location eventFile = StreamUtils.createStreamLocation(partitionLocation, "bucket" + i, 0, StreamFileType.EVENT);
        sources.add(new StreamFileOffset(eventFile, 0L, 0));
    }
    MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(config, sources);
    // Reads some events
    List<StreamEvent> events = Lists.newArrayList();
    long expectedTimestamp = 0L;
    // Read 250 events, in batch size of 10.
    for (int i = 0; i < 25; i++) {
        Assert.assertEquals(10, reader.read(events, 10, 0, TimeUnit.SECONDS));
        Assert.assertEquals(10, events.size());
        for (StreamEvent event : events) {
            Assert.assertEquals(expectedTimestamp, event.getTimestamp());
            Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
            expectedTimestamp++;
        }
        events.clear();
    }
    // Capture the offsets
    Iterable<StreamFileOffset> offsets = ImmutableList.copyOf(Iterables.transform(reader.getPosition(), new Function<StreamFileOffset, StreamFileOffset>() {

        @Override
        public StreamFileOffset apply(StreamFileOffset input) {
            return new StreamFileOffset(input);
        }
    }));
    reader.close();
    // Create another multi reader with the offsets
    sources.clear();
    for (StreamFileOffset offset : offsets) {
        sources.add(offset);
    }
    // Read 750 events, in batch size of 10.
    reader = new MultiLiveStreamFileReader(config, sources);
    for (int i = 0; i < 75; i++) {
        Assert.assertEquals(10, reader.read(events, 10, 0, TimeUnit.SECONDS));
        Assert.assertEquals(10, events.size());
        for (StreamEvent event : events) {
            Assert.assertEquals(expectedTimestamp, event.getTimestamp());
            Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
            expectedTimestamp++;
        }
        events.clear();
    }
    Assert.assertEquals(0, reader.read(events, 10, 2, TimeUnit.SECONDS));
    reader.close();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) Function(com.google.common.base.Function) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 30 with StreamConfig

use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.

the class MultiLiveStreamFileReaderTestBase method testLiveFileReader.

@Test
public void testLiveFileReader() throws Exception {
    String streamName = "liveReader";
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    Location location = getLocationFactory().create(streamName);
    location.mkdirs();
    // Create a stream with 5 seconds partition.
    StreamConfig config = new StreamConfig(streamId, 5000, 1000, Long.MAX_VALUE, location, null, 1000);
    // Write 5 events in the first partition
    try (FileWriter<StreamEvent> writer = createWriter(config, "live.0")) {
        for (int i = 0; i < 5; i++) {
            writer.append(StreamFileTestUtils.createEvent(i, "Testing " + i));
        }
    }
    // Writer 5 events in the forth partition (ts = 15 to 19)
    try (FileWriter<StreamEvent> writer = createWriter(config, "live.0")) {
        for (int i = 0; i < 5; i++) {
            writer.append(StreamFileTestUtils.createEvent(i + 15, "Testing " + (i + 15)));
        }
    }
    // Create a LiveStreamFileReader to read 10 events. It should be able to read them all.
    Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), 0, config.getPartitionDuration());
    Location eventLocation = StreamUtils.createStreamLocation(partitionLocation, "live.0", 0, StreamFileType.EVENT);
    List<StreamEvent> events = new ArrayList<>();
    try (LiveStreamFileReader reader = new LiveStreamFileReader(config, new StreamFileOffset(eventLocation, 0, 0))) {
        while (events.size() < 10) {
            // It shouldn't have empty read.
            Assert.assertTrue(reader.read(events, Integer.MAX_VALUE, 0, TimeUnit.SECONDS) > 0);
        }
    }
    Assert.assertEquals(10, events.size());
    // First 5 events must have timestamps 0-4
    Iterator<StreamEvent> itor = events.iterator();
    for (int i = 0; i < 5; i++) {
        Assert.assertEquals(i, itor.next().getTimestamp());
    }
    // Next 5 events must have timestamps 15-19
    for (int i = 15; i < 20; i++) {
        Assert.assertEquals(i, itor.next().getTimestamp());
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ArrayList(java.util.ArrayList) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Aggregations

StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)18 StreamId (co.cask.cdap.proto.id.StreamId)18 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)15 Test (org.junit.Test)14 Location (org.apache.twill.filesystem.Location)10 IOException (java.io.IOException)7 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)6 NamespaceId (co.cask.cdap.proto.id.NamespaceId)6 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)5 TableId (co.cask.cdap.data2.util.TableId)5 TransactionContext (org.apache.tephra.TransactionContext)5 NotificationFeedException (co.cask.cdap.notifications.feeds.NotificationFeedException)3 FileNotFoundException (java.io.FileNotFoundException)3 Properties (java.util.Properties)3 StreamSpecification (co.cask.cdap.api.data.stream.StreamSpecification)2 FileWriter (co.cask.cdap.data.file.FileWriter)2 LevelDBTableCore (co.cask.cdap.data2.dataset2.lib.table.leveldb.LevelDBTableCore)2 ColumnFamilyDescriptorBuilder (co.cask.cdap.data2.util.hbase.ColumnFamilyDescriptorBuilder)2 TableDescriptorBuilder (co.cask.cdap.data2.util.hbase.TableDescriptorBuilder)2 HBaseDDLExecutor (co.cask.cdap.spi.hbase.HBaseDDLExecutor)2