Search in sources :

Example 16 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class ConcurrentStreamWriterTestBase method testConcurrentAppendFile.

@Test
public void testConcurrentAppendFile() throws Exception {
    final String streamName = "testConcurrentFile";
    NamespaceId namespace = new NamespaceId("namespace");
    StreamId streamId = namespace.stream(streamName);
    StreamAdmin streamAdmin = new TestStreamAdmin(getNamespacedLocationFactory(), Long.MAX_VALUE, 1000);
    int threads = Runtime.getRuntime().availableProcessors() * 4;
    StreamFileWriterFactory fileWriterFactory = createStreamFileWriterFactory();
    final ConcurrentStreamWriter streamWriter = createStreamWriter(streamId, streamAdmin, threads, fileWriterFactory);
    int msgCount = 10000;
    NamespacedLocationFactory locationFactory = getNamespacedLocationFactory();
    // Half of the threads will be calling appendFile, then other half append event one by one
    // Prepare the files first, each file has 10000 events.
    final List<FileInfo> fileInfos = Lists.newArrayList();
    for (int i = 0; i < threads / 2; i++) {
        fileInfos.add(generateFile(locationFactory, i, msgCount));
    }
    // Append file and write events
    final CountDownLatch startLatch = new CountDownLatch(1);
    final CountDownLatch completion = new CountDownLatch(threads);
    ExecutorService executor = Executors.newFixedThreadPool(threads);
    for (int i = 0; i < threads / 2; i++) {
        executor.execute(createAppendFileTask(streamId, streamWriter, fileInfos.get(i), startLatch, completion));
    }
    for (int i = threads / 2; i < threads; i++) {
        executor.execute(createWriterTask(streamId, streamWriter, i, msgCount, 50, startLatch, completion));
    }
    startLatch.countDown();
    Assert.assertTrue(completion.await(4, TimeUnit.MINUTES));
    // Verify all events are written.
    // There should be only one partition
    Location partitionLocation = streamAdmin.getConfig(streamId).getLocation().list().get(0);
    List<Location> files = partitionLocation.list();
    List<StreamEvent> events = Lists.newArrayListWithCapacity(threads * msgCount);
    for (Location location : files) {
        // Only create reader for the event file
        if (StreamFileType.getType(location.getName()) != StreamFileType.EVENT) {
            continue;
        }
        StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(location));
        reader.read(events, Integer.MAX_VALUE, 0, TimeUnit.SECONDS);
    }
    Assert.assertTrue(verifyEvents(threads, msgCount, events));
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) NamespacedLocationFactory(co.cask.cdap.common.namespace.NamespacedLocationFactory) CountDownLatch(java.util.concurrent.CountDownLatch) StreamAdmin(co.cask.cdap.data2.transaction.stream.StreamAdmin) NoopStreamAdmin(co.cask.cdap.data.stream.NoopStreamAdmin) StreamFileWriterFactory(co.cask.cdap.data.stream.StreamFileWriterFactory) LocationStreamFileWriterFactory(co.cask.cdap.data.runtime.LocationStreamFileWriterFactory) ExecutorService(java.util.concurrent.ExecutorService) NamespaceId(co.cask.cdap.proto.id.NamespaceId) StreamDataFileReader(co.cask.cdap.data.stream.StreamDataFileReader) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 17 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamDataFileTestBase method testOffset.

@Test
public void testOffset() throws Exception {
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    Location eventFile = dir.getTempFile(".dat");
    Location indexFile = dir.getTempFile(".idx");
    // Writer 100 events with different timestamps.
    StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10L);
    for (int i = 0; i < 100; i++) {
        writer.append(StreamFileTestUtils.createEvent(i, "Testing " + i));
    }
    writer.close();
    StreamDataFileIndex index = new StreamDataFileIndex(Locations.newInputSupplier(indexFile));
    StreamDataFileIndexIterator iterator = index.indexIterator();
    while (iterator.nextIndexEntry()) {
        StreamDataFileReader reader = StreamDataFileReader.createWithOffset(Locations.newInputSupplier(eventFile), Locations.newInputSupplier(indexFile), iterator.currentPosition() - 1);
        List<StreamEvent> events = Lists.newArrayList();
        Assert.assertEquals(1, reader.read(events, 1, 0, TimeUnit.SECONDS));
        Assert.assertEquals(iterator.currentTimestamp(), events.get(0).getTimestamp());
    }
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 18 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamDataFileTestBase method testFilter.

@Test
public void testFilter() throws Exception {
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    final Location eventFile = dir.getTempFile(".dat");
    final Location indexFile = dir.getTempFile(".idx");
    StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L);
    writer.append(StreamFileTestUtils.createEvent(0, "Message 1"));
    writer.flush();
    StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
    List<StreamEvent> events = Lists.newArrayList();
    final AtomicBoolean active = new AtomicBoolean(false);
    ReadFilter filter = new ReadFilter() {

        private long nextTimestamp = -1L;

        @Override
        public void reset() {
            active.set(false);
            nextTimestamp = -1L;
        }

        @Override
        public boolean acceptTimestamp(long timestamp) {
            active.set(true);
            nextTimestamp = timestamp + 1;
            return false;
        }

        @Override
        public long getNextTimestampHint() {
            return nextTimestamp;
        }
    };
    Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS, filter));
    Assert.assertTrue(active.get());
    filter.reset();
    Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS, filter));
    Assert.assertFalse(active.get());
    reader.close();
    writer.close();
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ReadFilter(co.cask.cdap.data.file.ReadFilter) TTLReadFilter(co.cask.cdap.data.file.filter.TTLReadFilter) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 19 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamDataFileTestBase method testEndOfFile.

@Test
public void testEndOfFile() throws Exception {
    // This test is for opening a reader with start time beyond the last event in the file.
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    Location eventFile = dir.getTempFile(".dat");
    Location indexFile = dir.getTempFile(".idx");
    // Write 5 events
    StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L);
    for (int i = 0; i < 5; i++) {
        writer.append(StreamFileTestUtils.createEvent(i, "Testing " + i));
    }
    writer.close();
    // Open a reader with timestamp larger that all events in the file.
    StreamDataFileReader reader = StreamDataFileReader.createByStartTime(Locations.newInputSupplier(eventFile), Locations.newInputSupplier(indexFile), 10L);
    List<StreamEvent> events = Lists.newArrayList();
    Assert.assertEquals(-1, reader.read(events, 10, 1, TimeUnit.SECONDS));
    reader.close();
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 20 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamDataFileTestBase method testEventTemplate.

/**
 * This unit test is to test the v2 file format that supports
 * defaulting values in stream event (timestamp and headers).
 */
@Test
public void testEventTemplate() throws IOException, InterruptedException {
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    Location eventFile = dir.getTempFile(".dat");
    Location indexFile = dir.getTempFile(".idx");
    // Creates a stream file with the uni timestamp property and a default header (key=value)
    StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L, ImmutableMap.of(StreamDataFileConstants.Property.Key.UNI_TIMESTAMP, StreamDataFileConstants.Property.Value.CLOSE_TIMESTAMP, StreamDataFileConstants.Property.Key.EVENT_HEADER_PREFIX + "key", "value"));
    // Write 1000 events with different timestamp
    for (int i = 0; i < 1000; i++) {
        writer.append(StreamFileTestUtils.createEvent(i, "Message " + i));
    }
    // Trying to get close timestamp should throw exception before the file get closed
    try {
        writer.getCloseTimestamp();
        Assert.fail();
    } catch (IllegalStateException e) {
    // Expected
    }
    writer.close();
    // Get the close timestamp from the file for assertion below
    long timestamp = writer.getCloseTimestamp();
    // Create a reader to read all events. All events should have the same timestamp
    StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
    List<StreamEvent> events = Lists.newArrayList();
    Assert.assertEquals(1000, reader.read(events, 1000, 0, TimeUnit.SECONDS));
    // All events should have the same timestamp and contains a default header
    for (StreamEvent event : events) {
        Assert.assertEquals(timestamp, event.getTimestamp());
        Assert.assertEquals("value", event.getHeaders().get("key"));
    }
    // No more events
    Assert.assertEquals(-1, reader.read(events, 1, 0, TimeUnit.SECONDS));
    reader.close();
    // Open another read that reads with a filter that skips all events by timestamp
    reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
    int res = reader.read(events, 1, 0, TimeUnit.SECONDS, new ReadFilter() {

        @Override
        public boolean acceptTimestamp(long timestamp) {
            return false;
        }
    });
    Assert.assertEquals(-1, res);
    reader.close();
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ReadFilter(co.cask.cdap.data.file.ReadFilter) TTLReadFilter(co.cask.cdap.data.file.filter.TTLReadFilter) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Aggregations

StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)84 Test (org.junit.Test)65 Location (org.apache.twill.filesystem.Location)27 StreamId (co.cask.cdap.proto.id.StreamId)24 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)19 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)17 Schema (co.cask.cdap.api.data.schema.Schema)10 IOException (java.io.IOException)9 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)8 ByteBuffer (java.nio.ByteBuffer)8 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)7 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)6 TransactionContext (org.apache.tephra.TransactionContext)6 BinaryDecoder (co.cask.cdap.common.io.BinaryDecoder)5 TypeToken (com.google.common.reflect.TypeToken)5 StreamEventCodec (co.cask.cdap.common.stream.StreamEventCodec)4 IdentityStreamEventDecoder (co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder)4 File (java.io.File)4 SchemaHash (co.cask.cdap.api.data.schema.SchemaHash)3 QueueName (co.cask.cdap.common.queue.QueueName)3