Search in sources :

Example 21 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamDataFileTestBase method testLiveStream.

/**
 * Test live stream reader with new partitions and/or sequence file being created over time.
 */
@Category(SlowTests.class)
@Test
public void testLiveStream() throws Exception {
    String streamName = "live";
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    final String filePrefix = "prefix";
    // 5 seconds
    long partitionDuration = 5000;
    Location location = getLocationFactory().create(streamName);
    location.mkdirs();
    final StreamConfig config = new StreamConfig(streamId, partitionDuration, 10000, Long.MAX_VALUE, location, null, 1000);
    // Create a thread that will write 10 event per second
    final AtomicInteger eventsWritten = new AtomicInteger();
    final List<Closeable> closeables = Lists.newArrayList();
    Thread writerThread = new Thread() {

        @Override
        public void run() {
            try {
                while (!interrupted()) {
                    FileWriter<StreamEvent> writer = createWriter(config, filePrefix);
                    closeables.add(writer);
                    for (int i = 0; i < 10; i++) {
                        long ts = System.currentTimeMillis();
                        writer.append(StreamFileTestUtils.createEvent(ts, "Testing"));
                        eventsWritten.getAndIncrement();
                    }
                    writer.flush();
                    TimeUnit.SECONDS.sleep(1);
                }
            } catch (IOException e) {
                LOG.error(e.getMessage(), e);
                throw Throwables.propagate(e);
            } catch (InterruptedException e) {
            // No-op
            }
        }
    };
    // Create a live reader start with one partition earlier than current time.
    long partitionStart = StreamUtils.getPartitionStartTime(System.currentTimeMillis() - config.getPartitionDuration(), config.getPartitionDuration());
    Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), partitionStart, config.getPartitionDuration());
    Location eventLocation = StreamUtils.createStreamLocation(partitionLocation, filePrefix, 0, StreamFileType.EVENT);
    // Creates a live stream reader that check for sequence file ever 100 millis.
    FileReader<PositionStreamEvent, StreamFileOffset> reader = new LiveStreamFileReader(config, new StreamFileOffset(eventLocation, 0L, 0), 100);
    List<StreamEvent> events = Lists.newArrayList();
    // Try to read, since the writer thread is not started, it should get nothing
    Assert.assertEquals(0, reader.read(events, 1, 2, TimeUnit.SECONDS));
    // Start the writer thread.
    writerThread.start();
    Stopwatch stopwatch = new Stopwatch();
    stopwatch.start();
    while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && reader.read(events, 1, 1, TimeUnit.SECONDS) == 0) {
    // Empty
    }
    stopwatch.stop();
    // Should be able to read a event
    Assert.assertEquals(1, events.size());
    TimeUnit.MILLISECONDS.sleep(partitionDuration * 2);
    writerThread.interrupt();
    writerThread.join();
    LOG.info("Writer stopped with {} events written.", eventsWritten.get());
    stopwatch.reset();
    while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && events.size() != eventsWritten.get()) {
        reader.read(events, eventsWritten.get(), 0, TimeUnit.SECONDS);
    }
    // Should see all events written
    Assert.assertEquals(eventsWritten.get(), events.size());
    // Take a snapshot of the offset.
    StreamFileOffset offset = new StreamFileOffset(reader.getPosition());
    reader.close();
    for (Closeable c : closeables) {
        Closeables.closeQuietly(c);
    }
    // Now creates a new writer to write 10 more events across two partitions with a skip one partition.
    try (FileWriter<StreamEvent> writer = createWriter(config, filePrefix)) {
        for (int i = 0; i < 5; i++) {
            long ts = System.currentTimeMillis();
            writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + ts));
        }
        TimeUnit.MILLISECONDS.sleep(partitionDuration * 3 / 2);
        for (int i = 0; i < 5; i++) {
            long ts = System.currentTimeMillis();
            writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + ts));
        }
    }
    // Create a new reader with the previous offset
    reader = new LiveStreamFileReader(config, offset, 100);
    events.clear();
    stopwatch.reset();
    while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && events.size() != 10) {
        reader.read(events, 10, 0, TimeUnit.SECONDS);
    }
    Assert.assertEquals(10, events.size());
    // Try to read more, should got nothing
    reader.read(events, 10, 2, TimeUnit.SECONDS);
    reader.close();
    for (Closeable c : closeables) {
        c.close();
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) Closeable(java.io.Closeable) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) Stopwatch(com.google.common.base.Stopwatch) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) IOException(java.io.IOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Location(org.apache.twill.filesystem.Location) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 22 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamDataFileTestBase method testBasicReadWrite.

/**
 * Test for basic read write to verify data encode/decode correctly.
 * @throws Exception
 */
@Test
public void testBasicReadWrite() throws Exception {
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    Location eventFile = dir.getTempFile(".dat");
    Location indexFile = dir.getTempFile(".idx");
    StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L);
    // Write 100 events to the stream, with 20 even timestamps
    for (int i = 0; i < 40; i += 2) {
        for (int j = 0; j < 5; j++) {
            writer.append(StreamFileTestUtils.createEvent(i, "Basic test " + i));
        }
    }
    writer.close();
    // Create a reader that starts from beginning.
    StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
    List<StreamEvent> events = Lists.newArrayList();
    Assert.assertEquals(100, reader.read(events, 100, 1, TimeUnit.SECONDS));
    Assert.assertEquals(-1, reader.read(events, 100, 1, TimeUnit.SECONDS));
    reader.close();
    // Collect the events in a multimap for verification
    Multimap<Long, String> messages = LinkedListMultimap.create();
    for (StreamEvent event : events) {
        messages.put(event.getTimestamp(), Charsets.UTF_8.decode(event.getBody()).toString());
    }
    // 20 timestamps
    Assert.assertEquals(20, messages.keySet().size());
    for (Map.Entry<Long, Collection<String>> entry : messages.asMap().entrySet()) {
        // Each timestamp has 5 messages
        Assert.assertEquals(5, entry.getValue().size());
        // All 5 messages for a timestamp are the same
        Assert.assertEquals(1, ImmutableSet.copyOf(entry.getValue()).size());
        // Message is "Basic test " + timestamp
        Assert.assertEquals("Basic test " + entry.getKey(), entry.getValue().iterator().next());
    }
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) Collection(java.util.Collection) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 23 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamDataFileTestBase method testTail.

@Test
public void testTail() throws Exception {
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    final Location eventFile = dir.getTempFile(".dat");
    final Location indexFile = dir.getTempFile(".idx");
    final CountDownLatch writerStarted = new CountDownLatch(1);
    // Create a thread for writing 10 events, 1 event per 200 milliseconds.
    // It pauses after writing 5 events.
    final CountDownLatch waitLatch = new CountDownLatch(1);
    Thread writerThread = new Thread() {

        @Override
        public void run() {
            try {
                StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L);
                writerStarted.countDown();
                for (int i = 0; i < 10; i++) {
                    writer.append(StreamFileTestUtils.createEvent(i, "Testing " + i));
                    writer.flush();
                    TimeUnit.MILLISECONDS.sleep(200);
                    if (i == 4) {
                        waitLatch.await();
                    }
                }
                writer.close();
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        }
    };
    StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
    List<StreamEvent> events = Lists.newArrayList();
    writerThread.start();
    writerStarted.await();
    // Expect 10 events, followed by EOF.
    Assert.assertEquals(5, reader.read(events, 5, 2000, TimeUnit.MILLISECONDS));
    waitLatch.countDown();
    Assert.assertEquals(5, reader.read(events, 5, 2000, TimeUnit.MILLISECONDS));
    Assert.assertEquals(-1, reader.read(events, 1, 500, TimeUnit.MILLISECONDS));
    Assert.assertEquals(10, events.size());
    // Verify the ordering of events
    int ts = 0;
    for (StreamEvent event : events) {
        Assert.assertEquals(ts, event.getTimestamp());
        Assert.assertEquals("Testing " + ts, Charsets.UTF_8.decode(event.getBody()).toString());
        ts++;
    }
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) CountDownLatch(java.util.concurrent.CountDownLatch) IOException(java.io.IOException) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 24 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamDataFileTestBase method testLargeDataBlock.

@Test
public void testLargeDataBlock() throws Exception {
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    Location eventFile = dir.getTempFile(".dat");
    Location indexFile = dir.getTempFile(".idx");
    StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L);
    // Write 1200 events in one data block with each event has size of 150 bytes.
    // This make sure it crosses the 128K read buffer boundary that is observed in HDFS.
    // The StreamDataFileWriter has an internal data block buffer size of 256K,
    // hence writing ~175K data block shouldn't go over the flush limit in the writer, making sure all
    // events are in one data block
    ByteBuffer body = Charsets.UTF_8.encode(Strings.repeat('0', 150));
    for (int i = 0; i < 1200; i++) {
        writer.append(new StreamEvent(ImmutableMap.<String, String>of(), body.duplicate(), 0));
    }
    writer.close();
    // Read event one by one
    StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
    List<StreamEvent> events = Lists.newArrayList();
    for (int i = 0; i < 1200; i++) {
        Assert.assertEquals(1, reader.read(events, 1, 0, TimeUnit.SECONDS));
        Assert.assertEquals(body, events.get(0).getBody());
        events.clear();
    }
    Assert.assertEquals(-1, reader.read(events, 1, 0, TimeUnit.SECONDS));
    reader.close();
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ByteBuffer(java.nio.ByteBuffer) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 25 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamDataFileTestBase method testAppendAll.

/**
 * This test is to validate batch write with the same timestamp are written in the same data block.
 */
@Test
public void testAppendAll() throws Exception {
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    Location eventFile = dir.getTempFile(".dat");
    Location indexFile = dir.getTempFile(".idx");
    // Creates a stream file
    try (StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L)) {
        final CountDownLatch writeCompleted = new CountDownLatch(1);
        final CountDownLatch readAttempted = new CountDownLatch(1);
        // Write 1000 events using appendAll from a separate thread
        // It writes 1000 events of size 300 bytes of the same timestamp and wait for a signal before ending.
        // This make sure the data block is not written (internal buffer size is 256K if the writer flush),
        // hence the reader shouldn't be seeing it.
        Thread t = new Thread() {

            @Override
            public void run() {
                try {
                    writer.appendAll(new AbstractIterator<StreamEvent>() {

                        int count = 1000;

                        long timestamp = System.currentTimeMillis();

                        Map<String, String> headers = ImmutableMap.of();

                        @Override
                        protected StreamEvent computeNext() {
                            if (count-- > 0) {
                                return new StreamEvent(headers, Charsets.UTF_8.encode(String.format("%0300d", count)), timestamp);
                            }
                            writeCompleted.countDown();
                            Uninterruptibles.awaitUninterruptibly(readAttempted);
                            Flushables.flushQuietly(writer);
                            return endOfData();
                        }
                    });
                } catch (IOException e) {
                    throw Throwables.propagate(e);
                }
            }
        };
        t.start();
        // Create a reader
        try (StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile))) {
            List<PositionStreamEvent> events = Lists.newArrayList();
            // Wait for the writer completion
            Assert.assertTrue(writeCompleted.await(20, TimeUnit.SECONDS));
            // Try to read a event, nothing should be read
            Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS));
            // Now signal writer to flush
            readAttempted.countDown();
            // Now should be able to read 1000 events
            t.join(10000);
            Assert.assertEquals(1000, reader.read(events, 1000, 0, TimeUnit.SECONDS));
            int size = events.size();
            long lastStart = -1;
            for (int i = 0; i < size; i++) {
                PositionStreamEvent event = events.get(i);
                Assert.assertEquals(String.format("%0300d", size - i - 1), Charsets.UTF_8.decode(event.getBody()).toString());
                if (lastStart > 0) {
                    // The position differences between two consecutive events should be 303
                    // 2 bytes for body length, 300 bytes body, 1 byte header map (value == 0)
                    Assert.assertEquals(303L, event.getStart() - lastStart);
                }
                lastStart = event.getStart();
            }
        }
    }
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Aggregations

StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)84 Test (org.junit.Test)65 Location (org.apache.twill.filesystem.Location)27 StreamId (co.cask.cdap.proto.id.StreamId)24 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)19 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)17 Schema (co.cask.cdap.api.data.schema.Schema)10 IOException (java.io.IOException)9 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)8 ByteBuffer (java.nio.ByteBuffer)8 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)7 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)6 TransactionContext (org.apache.tephra.TransactionContext)6 BinaryDecoder (co.cask.cdap.common.io.BinaryDecoder)5 TypeToken (com.google.common.reflect.TypeToken)5 StreamEventCodec (co.cask.cdap.common.stream.StreamEventCodec)4 IdentityStreamEventDecoder (co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder)4 File (java.io.File)4 SchemaHash (co.cask.cdap.api.data.schema.SchemaHash)3 QueueName (co.cask.cdap.common.queue.QueueName)3