Search in sources :

Example 1 with StreamDataFileReader

use of co.cask.cdap.data.stream.StreamDataFileReader in project cdap by caskdata.

the class ConcurrentStreamWriterTestBase method testConcurrentAppendFile.

@Test
public void testConcurrentAppendFile() throws Exception {
    final String streamName = "testConcurrentFile";
    NamespaceId namespace = new NamespaceId("namespace");
    StreamId streamId = namespace.stream(streamName);
    StreamAdmin streamAdmin = new TestStreamAdmin(getNamespacedLocationFactory(), Long.MAX_VALUE, 1000);
    int threads = Runtime.getRuntime().availableProcessors() * 4;
    StreamFileWriterFactory fileWriterFactory = createStreamFileWriterFactory();
    final ConcurrentStreamWriter streamWriter = createStreamWriter(streamId, streamAdmin, threads, fileWriterFactory);
    int msgCount = 10000;
    NamespacedLocationFactory locationFactory = getNamespacedLocationFactory();
    // Half of the threads will be calling appendFile, then other half append event one by one
    // Prepare the files first, each file has 10000 events.
    final List<FileInfo> fileInfos = Lists.newArrayList();
    for (int i = 0; i < threads / 2; i++) {
        fileInfos.add(generateFile(locationFactory, i, msgCount));
    }
    // Append file and write events
    final CountDownLatch startLatch = new CountDownLatch(1);
    final CountDownLatch completion = new CountDownLatch(threads);
    ExecutorService executor = Executors.newFixedThreadPool(threads);
    for (int i = 0; i < threads / 2; i++) {
        executor.execute(createAppendFileTask(streamId, streamWriter, fileInfos.get(i), startLatch, completion));
    }
    for (int i = threads / 2; i < threads; i++) {
        executor.execute(createWriterTask(streamId, streamWriter, i, msgCount, 50, startLatch, completion));
    }
    startLatch.countDown();
    Assert.assertTrue(completion.await(4, TimeUnit.MINUTES));
    // Verify all events are written.
    // There should be only one partition
    Location partitionLocation = streamAdmin.getConfig(streamId).getLocation().list().get(0);
    List<Location> files = partitionLocation.list();
    List<StreamEvent> events = Lists.newArrayListWithCapacity(threads * msgCount);
    for (Location location : files) {
        // Only create reader for the event file
        if (StreamFileType.getType(location.getName()) != StreamFileType.EVENT) {
            continue;
        }
        StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(location));
        reader.read(events, Integer.MAX_VALUE, 0, TimeUnit.SECONDS);
    }
    Assert.assertTrue(verifyEvents(threads, msgCount, events));
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) NamespacedLocationFactory(co.cask.cdap.common.namespace.NamespacedLocationFactory) CountDownLatch(java.util.concurrent.CountDownLatch) StreamAdmin(co.cask.cdap.data2.transaction.stream.StreamAdmin) NoopStreamAdmin(co.cask.cdap.data.stream.NoopStreamAdmin) StreamFileWriterFactory(co.cask.cdap.data.stream.StreamFileWriterFactory) LocationStreamFileWriterFactory(co.cask.cdap.data.runtime.LocationStreamFileWriterFactory) ExecutorService(java.util.concurrent.ExecutorService) NamespaceId(co.cask.cdap.proto.id.NamespaceId) StreamDataFileReader(co.cask.cdap.data.stream.StreamDataFileReader) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 2 with StreamDataFileReader

use of co.cask.cdap.data.stream.StreamDataFileReader in project cdap by caskdata.

the class ConcurrentStreamWriterTestBase method testConcurrentWrite.

@Test
public void testConcurrentWrite() throws Exception {
    final String streamName = "testConcurrentWrite";
    NamespaceId namespace = new NamespaceId("namespace");
    StreamId streamId = namespace.stream(streamName);
    StreamAdmin streamAdmin = new TestStreamAdmin(getNamespacedLocationFactory(), Long.MAX_VALUE, 1000);
    int threads = Runtime.getRuntime().availableProcessors() * 4;
    StreamFileWriterFactory fileWriterFactory = createStreamFileWriterFactory();
    final ConcurrentStreamWriter streamWriter = createStreamWriter(streamId, streamAdmin, threads, fileWriterFactory);
    // Starts n threads to write events through stream writer, each thread write 1000 events
    final int msgPerThread = 1000;
    final CountDownLatch startLatch = new CountDownLatch(1);
    final CountDownLatch completion = new CountDownLatch(threads);
    ExecutorService executor = Executors.newFixedThreadPool(threads);
    // Half of the threads write events one by one, the other half writes in batch of size 10
    for (int i = 0; i < threads / 2; i++) {
        executor.execute(createWriterTask(streamId, streamWriter, i, msgPerThread, 1, startLatch, completion));
    }
    for (int i = threads / 2; i < threads; i++) {
        executor.execute(createWriterTask(streamId, streamWriter, i, msgPerThread, 10, startLatch, completion));
    }
    startLatch.countDown();
    Assert.assertTrue(completion.await(120, TimeUnit.SECONDS));
    // Verify all events are written.
    // There should be only one partition and one file inside
    Location partitionLocation = streamAdmin.getConfig(streamId).getLocation().list().get(0);
    Location streamLocation = StreamUtils.createStreamLocation(partitionLocation, fileWriterFactory.getFileNamePrefix(), 0, StreamFileType.EVENT);
    StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(streamLocation));
    List<StreamEvent> events = Lists.newArrayListWithCapacity(threads * msgPerThread);
    // Should read all messages
    Assert.assertEquals(threads * msgPerThread, reader.read(events, Integer.MAX_VALUE, 0, TimeUnit.SECONDS));
    // Verify all messages as expected
    Assert.assertTrue(verifyEvents(threads, msgPerThread, events));
    reader.close();
    streamWriter.close();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) CountDownLatch(java.util.concurrent.CountDownLatch) StreamAdmin(co.cask.cdap.data2.transaction.stream.StreamAdmin) NoopStreamAdmin(co.cask.cdap.data.stream.NoopStreamAdmin) StreamFileWriterFactory(co.cask.cdap.data.stream.StreamFileWriterFactory) LocationStreamFileWriterFactory(co.cask.cdap.data.runtime.LocationStreamFileWriterFactory) ExecutorService(java.util.concurrent.ExecutorService) NamespaceId(co.cask.cdap.proto.id.NamespaceId) StreamDataFileReader(co.cask.cdap.data.stream.StreamDataFileReader) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Aggregations

StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)2 LocationStreamFileWriterFactory (co.cask.cdap.data.runtime.LocationStreamFileWriterFactory)2 NoopStreamAdmin (co.cask.cdap.data.stream.NoopStreamAdmin)2 StreamDataFileReader (co.cask.cdap.data.stream.StreamDataFileReader)2 StreamFileWriterFactory (co.cask.cdap.data.stream.StreamFileWriterFactory)2 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)2 NamespaceId (co.cask.cdap.proto.id.NamespaceId)2 StreamId (co.cask.cdap.proto.id.StreamId)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 ExecutorService (java.util.concurrent.ExecutorService)2 Location (org.apache.twill.filesystem.Location)2 Test (org.junit.Test)2 NamespacedLocationFactory (co.cask.cdap.common.namespace.NamespacedLocationFactory)1