Search in sources :

Example 66 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class MultiLiveStreamFileReaderTestBase method testMultiFileReader.

@Test
public void testMultiFileReader() throws Exception {
    String streamName = "multiReader";
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    Location location = getLocationFactory().create(streamName);
    location.mkdirs();
    // Create a stream with 1 partition.
    StreamConfig config = new StreamConfig(streamId, Long.MAX_VALUE, 10000, Long.MAX_VALUE, location, null, 1000);
    // Write out 200 events in 5 files, with interleaving timestamps
    List<FileWriter<StreamEvent>> writers = Lists.newArrayList();
    for (int i = 0; i < 5; i++) {
        FileWriter<StreamEvent> writer = createWriter(config, "bucket" + i);
        writers.add(writer);
        for (int j = 0; j < 200; j++) {
            long timestamp = j * 5 + i;
            writer.append(StreamFileTestUtils.createEvent(timestamp, "Testing " + timestamp));
        }
    }
    // Flush all writers.
    for (FileWriter<StreamEvent> writer : writers) {
        writer.flush();
    }
    // Create a multi stream file reader
    List<StreamFileOffset> sources = Lists.newArrayList();
    Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), 0, Long.MAX_VALUE);
    for (int i = 0; i < 5; i++) {
        Location eventFile = StreamUtils.createStreamLocation(partitionLocation, "bucket" + i, 0, StreamFileType.EVENT);
        sources.add(new StreamFileOffset(eventFile, 0L, 0));
    }
    // Reads all events written so far.
    MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(config, sources);
    List<StreamEvent> events = Lists.newArrayList();
    long expectedTimestamp = 0L;
    for (int i = 0; i < 10; i++) {
        Assert.assertEquals(100, reader.read(events, 100, 0, TimeUnit.SECONDS));
        Assert.assertEquals(100, events.size());
        for (StreamEvent event : events) {
            Assert.assertEquals(expectedTimestamp, event.getTimestamp());
            Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
            expectedTimestamp++;
        }
        events.clear();
    }
    Assert.assertEquals(0, reader.read(events, 1, 1, TimeUnit.SECONDS));
    // Writes some more events to the first three writers.
    for (int i = 0; i < 3; i++) {
        FileWriter<StreamEvent> writer = writers.get(i);
        for (int j = 0; j < 10; j++) {
            long timestamp = 1000 + j * 3 + i;
            writer.append(StreamFileTestUtils.createEvent(timestamp, "Testing " + timestamp));
        }
    }
    // Close all writers
    for (FileWriter<StreamEvent> writer : writers) {
        writer.close();
    }
    // Continue to read
    Assert.assertEquals(30, reader.read(events, 30, 2, TimeUnit.SECONDS));
    Assert.assertEquals(30, events.size());
    for (StreamEvent event : events) {
        Assert.assertEquals(expectedTimestamp, event.getTimestamp());
        Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
        expectedTimestamp++;
    }
    // Should get no more events.
    Assert.assertEquals(0, reader.read(events, 1, 1, TimeUnit.SECONDS));
    reader.close();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) FileWriter(co.cask.cdap.data.file.FileWriter) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 67 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class StreamDataFileTestBase method testLiveStream.

/**
   * Test live stream reader with new partitions and/or sequence file being created over time.
   */
@Category(SlowTests.class)
@Test
public void testLiveStream() throws Exception {
    String streamName = "live";
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    final String filePrefix = "prefix";
    // 5 seconds
    long partitionDuration = 5000;
    Location location = getLocationFactory().create(streamName);
    location.mkdirs();
    final StreamConfig config = new StreamConfig(streamId, partitionDuration, 10000, Long.MAX_VALUE, location, null, 1000);
    // Create a thread that will write 10 event per second
    final AtomicInteger eventsWritten = new AtomicInteger();
    final List<Closeable> closeables = Lists.newArrayList();
    Thread writerThread = new Thread() {

        @Override
        public void run() {
            try {
                while (!interrupted()) {
                    FileWriter<StreamEvent> writer = createWriter(config, filePrefix);
                    closeables.add(writer);
                    for (int i = 0; i < 10; i++) {
                        long ts = System.currentTimeMillis();
                        writer.append(StreamFileTestUtils.createEvent(ts, "Testing"));
                        eventsWritten.getAndIncrement();
                    }
                    writer.flush();
                    TimeUnit.SECONDS.sleep(1);
                }
            } catch (IOException e) {
                LOG.error(e.getMessage(), e);
                throw Throwables.propagate(e);
            } catch (InterruptedException e) {
            // No-op
            }
        }
    };
    // Create a live reader start with one partition earlier than current time.
    long partitionStart = StreamUtils.getPartitionStartTime(System.currentTimeMillis() - config.getPartitionDuration(), config.getPartitionDuration());
    Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), partitionStart, config.getPartitionDuration());
    Location eventLocation = StreamUtils.createStreamLocation(partitionLocation, filePrefix, 0, StreamFileType.EVENT);
    // Creates a live stream reader that check for sequence file ever 100 millis.
    FileReader<PositionStreamEvent, StreamFileOffset> reader = new LiveStreamFileReader(config, new StreamFileOffset(eventLocation, 0L, 0), 100);
    List<StreamEvent> events = Lists.newArrayList();
    // Try to read, since the writer thread is not started, it should get nothing
    Assert.assertEquals(0, reader.read(events, 1, 2, TimeUnit.SECONDS));
    // Start the writer thread.
    writerThread.start();
    Stopwatch stopwatch = new Stopwatch();
    stopwatch.start();
    while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && reader.read(events, 1, 1, TimeUnit.SECONDS) == 0) {
    // Empty
    }
    stopwatch.stop();
    // Should be able to read a event
    Assert.assertEquals(1, events.size());
    TimeUnit.MILLISECONDS.sleep(partitionDuration * 2);
    writerThread.interrupt();
    writerThread.join();
    LOG.info("Writer stopped with {} events written.", eventsWritten.get());
    stopwatch.reset();
    while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && events.size() != eventsWritten.get()) {
        reader.read(events, eventsWritten.get(), 0, TimeUnit.SECONDS);
    }
    // Should see all events written
    Assert.assertEquals(eventsWritten.get(), events.size());
    // Take a snapshot of the offset.
    StreamFileOffset offset = new StreamFileOffset(reader.getPosition());
    reader.close();
    for (Closeable c : closeables) {
        Closeables.closeQuietly(c);
    }
    // Now creates a new writer to write 10 more events across two partitions with a skip one partition.
    try (FileWriter<StreamEvent> writer = createWriter(config, filePrefix)) {
        for (int i = 0; i < 5; i++) {
            long ts = System.currentTimeMillis();
            writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + ts));
        }
        TimeUnit.MILLISECONDS.sleep(partitionDuration * 3 / 2);
        for (int i = 0; i < 5; i++) {
            long ts = System.currentTimeMillis();
            writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + ts));
        }
    }
    // Create a new reader with the previous offset
    reader = new LiveStreamFileReader(config, offset, 100);
    events.clear();
    stopwatch.reset();
    while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && events.size() != 10) {
        reader.read(events, 10, 0, TimeUnit.SECONDS);
    }
    Assert.assertEquals(10, events.size());
    // Try to read more, should got nothing
    reader.read(events, 10, 2, TimeUnit.SECONDS);
    reader.close();
    for (Closeable c : closeables) {
        c.close();
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) Closeable(java.io.Closeable) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) Stopwatch(com.google.common.base.Stopwatch) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) IOException(java.io.IOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Location(org.apache.twill.filesystem.Location) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 68 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class StreamDataFileTestBase method testAppendAll.

/**
   * This test is to validate batch write with the same timestamp are written in the same data block.
   */
@Test
public void testAppendAll() throws Exception {
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    Location eventFile = dir.getTempFile(".dat");
    Location indexFile = dir.getTempFile(".idx");
    // Creates a stream file
    try (StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L)) {
        final CountDownLatch writeCompleted = new CountDownLatch(1);
        final CountDownLatch readAttempted = new CountDownLatch(1);
        // Write 1000 events using appendAll from a separate thread
        // It writes 1000 events of size 300 bytes of the same timestamp and wait for a signal before ending.
        // This make sure the data block is not written (internal buffer size is 256K if the writer flush),
        // hence the reader shouldn't be seeing it.
        Thread t = new Thread() {

            @Override
            public void run() {
                try {
                    writer.appendAll(new AbstractIterator<StreamEvent>() {

                        int count = 1000;

                        long timestamp = System.currentTimeMillis();

                        Map<String, String> headers = ImmutableMap.of();

                        @Override
                        protected StreamEvent computeNext() {
                            if (count-- > 0) {
                                return new StreamEvent(headers, Charsets.UTF_8.encode(String.format("%0300d", count)), timestamp);
                            }
                            writeCompleted.countDown();
                            Uninterruptibles.awaitUninterruptibly(readAttempted);
                            Flushables.flushQuietly(writer);
                            return endOfData();
                        }
                    });
                } catch (IOException e) {
                    throw Throwables.propagate(e);
                }
            }
        };
        t.start();
        // Create a reader
        try (StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile))) {
            List<PositionStreamEvent> events = Lists.newArrayList();
            // Wait for the writer completion
            Assert.assertTrue(writeCompleted.await(20, TimeUnit.SECONDS));
            // Try to read a event, nothing should be read
            Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS));
            // Now signal writer to flush
            readAttempted.countDown();
            // Now should be able to read 1000 events
            t.join(10000);
            Assert.assertEquals(1000, reader.read(events, 1000, 0, TimeUnit.SECONDS));
            int size = events.size();
            long lastStart = -1;
            for (int i = 0; i < size; i++) {
                PositionStreamEvent event = events.get(i);
                Assert.assertEquals(String.format("%0300d", size - i - 1), Charsets.UTF_8.decode(event.getBody()).toString());
                if (lastStart > 0) {
                    // The position differences between two consecutive events should be 303
                    // 2 bytes for body length, 300 bytes body, 1 byte header map (value == 0)
                    Assert.assertEquals(303L, event.getStart() - lastStart);
                }
                lastStart = event.getStart();
            }
        }
    }
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 69 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class StreamDataFileTestBase method testEventTemplate.

/**
   * This unit test is to test the v2 file format that supports
   * defaulting values in stream event (timestamp and headers).
   */
@Test
public void testEventTemplate() throws IOException, InterruptedException {
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    Location eventFile = dir.getTempFile(".dat");
    Location indexFile = dir.getTempFile(".idx");
    // Creates a stream file with the uni timestamp property and a default header (key=value)
    StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L, ImmutableMap.of(StreamDataFileConstants.Property.Key.UNI_TIMESTAMP, StreamDataFileConstants.Property.Value.CLOSE_TIMESTAMP, StreamDataFileConstants.Property.Key.EVENT_HEADER_PREFIX + "key", "value"));
    // Write 1000 events with different timestamp
    for (int i = 0; i < 1000; i++) {
        writer.append(StreamFileTestUtils.createEvent(i, "Message " + i));
    }
    // Trying to get close timestamp should throw exception before the file get closed
    try {
        writer.getCloseTimestamp();
        Assert.fail();
    } catch (IllegalStateException e) {
    // Expected
    }
    writer.close();
    // Get the close timestamp from the file for assertion below
    long timestamp = writer.getCloseTimestamp();
    // Create a reader to read all events. All events should have the same timestamp
    StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
    List<StreamEvent> events = Lists.newArrayList();
    Assert.assertEquals(1000, reader.read(events, 1000, 0, TimeUnit.SECONDS));
    // All events should have the same timestamp and contains a default header
    for (StreamEvent event : events) {
        Assert.assertEquals(timestamp, event.getTimestamp());
        Assert.assertEquals("value", event.getHeaders().get("key"));
    }
    // No more events
    Assert.assertEquals(-1, reader.read(events, 1, 0, TimeUnit.SECONDS));
    reader.close();
    // Open another read that reads with a filter that skips all events by timestamp
    reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
    int res = reader.read(events, 1, 0, TimeUnit.SECONDS, new ReadFilter() {

        @Override
        public boolean acceptTimestamp(long timestamp) {
            return false;
        }
    });
    Assert.assertEquals(-1, res);
    reader.close();
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ReadFilter(co.cask.cdap.data.file.ReadFilter) TTLReadFilter(co.cask.cdap.data.file.filter.TTLReadFilter) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 70 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class StreamDataFileTestBase method testFilter.

@Test
public void testFilter() throws Exception {
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    final Location eventFile = dir.getTempFile(".dat");
    final Location indexFile = dir.getTempFile(".idx");
    StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L);
    writer.append(StreamFileTestUtils.createEvent(0, "Message 1"));
    writer.flush();
    StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
    List<StreamEvent> events = Lists.newArrayList();
    final AtomicBoolean active = new AtomicBoolean(false);
    ReadFilter filter = new ReadFilter() {

        private long nextTimestamp = -1L;

        @Override
        public void reset() {
            active.set(false);
            nextTimestamp = -1L;
        }

        @Override
        public boolean acceptTimestamp(long timestamp) {
            active.set(true);
            nextTimestamp = timestamp + 1;
            return false;
        }

        @Override
        public long getNextTimestampHint() {
            return nextTimestamp;
        }
    };
    Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS, filter));
    Assert.assertTrue(active.get());
    filter.reset();
    Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS, filter));
    Assert.assertFalse(active.get());
    reader.close();
    writer.close();
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ReadFilter(co.cask.cdap.data.file.ReadFilter) TTLReadFilter(co.cask.cdap.data.file.filter.TTLReadFilter) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Aggregations

Location (org.apache.twill.filesystem.Location)246 Test (org.junit.Test)104 IOException (java.io.IOException)57 File (java.io.File)39 LocalLocationFactory (org.apache.twill.filesystem.LocalLocationFactory)29 LocationFactory (org.apache.twill.filesystem.LocationFactory)29 FileSet (co.cask.cdap.api.dataset.lib.FileSet)28 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)27 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)23 CConfiguration (co.cask.cdap.common.conf.CConfiguration)19 NamespaceId (co.cask.cdap.proto.id.NamespaceId)19 Manifest (java.util.jar.Manifest)18 HashMap (java.util.HashMap)17 StreamId (co.cask.cdap.proto.id.StreamId)16 OutputStream (java.io.OutputStream)15 DatasetFramework (co.cask.cdap.data2.dataset2.DatasetFramework)13 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)11 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)10 ArrayList (java.util.ArrayList)9 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)8