use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.
the class StreamDataFileTestBase method testLiveStream.
/**
* Test live stream reader with new partitions and/or sequence file being created over time.
*/
@Category(SlowTests.class)
@Test
public void testLiveStream() throws Exception {
String streamName = "live";
StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
final String filePrefix = "prefix";
// 5 seconds
long partitionDuration = 5000;
Location location = getLocationFactory().create(streamName);
location.mkdirs();
final StreamConfig config = new StreamConfig(streamId, partitionDuration, 10000, Long.MAX_VALUE, location, null, 1000);
// Create a thread that will write 10 event per second
final AtomicInteger eventsWritten = new AtomicInteger();
final List<Closeable> closeables = Lists.newArrayList();
Thread writerThread = new Thread() {
@Override
public void run() {
try {
while (!interrupted()) {
FileWriter<StreamEvent> writer = createWriter(config, filePrefix);
closeables.add(writer);
for (int i = 0; i < 10; i++) {
long ts = System.currentTimeMillis();
writer.append(StreamFileTestUtils.createEvent(ts, "Testing"));
eventsWritten.getAndIncrement();
}
writer.flush();
TimeUnit.SECONDS.sleep(1);
}
} catch (IOException e) {
LOG.error(e.getMessage(), e);
throw Throwables.propagate(e);
} catch (InterruptedException e) {
// No-op
}
}
};
// Create a live reader start with one partition earlier than current time.
long partitionStart = StreamUtils.getPartitionStartTime(System.currentTimeMillis() - config.getPartitionDuration(), config.getPartitionDuration());
Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), partitionStart, config.getPartitionDuration());
Location eventLocation = StreamUtils.createStreamLocation(partitionLocation, filePrefix, 0, StreamFileType.EVENT);
// Creates a live stream reader that check for sequence file ever 100 millis.
FileReader<PositionStreamEvent, StreamFileOffset> reader = new LiveStreamFileReader(config, new StreamFileOffset(eventLocation, 0L, 0), 100);
List<StreamEvent> events = Lists.newArrayList();
// Try to read, since the writer thread is not started, it should get nothing
Assert.assertEquals(0, reader.read(events, 1, 2, TimeUnit.SECONDS));
// Start the writer thread.
writerThread.start();
Stopwatch stopwatch = new Stopwatch();
stopwatch.start();
while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && reader.read(events, 1, 1, TimeUnit.SECONDS) == 0) {
// Empty
}
stopwatch.stop();
// Should be able to read a event
Assert.assertEquals(1, events.size());
TimeUnit.MILLISECONDS.sleep(partitionDuration * 2);
writerThread.interrupt();
writerThread.join();
LOG.info("Writer stopped with {} events written.", eventsWritten.get());
stopwatch.reset();
while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && events.size() != eventsWritten.get()) {
reader.read(events, eventsWritten.get(), 0, TimeUnit.SECONDS);
}
// Should see all events written
Assert.assertEquals(eventsWritten.get(), events.size());
// Take a snapshot of the offset.
StreamFileOffset offset = new StreamFileOffset(reader.getPosition());
reader.close();
for (Closeable c : closeables) {
Closeables.closeQuietly(c);
}
// Now creates a new writer to write 10 more events across two partitions with a skip one partition.
try (FileWriter<StreamEvent> writer = createWriter(config, filePrefix)) {
for (int i = 0; i < 5; i++) {
long ts = System.currentTimeMillis();
writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + ts));
}
TimeUnit.MILLISECONDS.sleep(partitionDuration * 3 / 2);
for (int i = 0; i < 5; i++) {
long ts = System.currentTimeMillis();
writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + ts));
}
}
// Create a new reader with the previous offset
reader = new LiveStreamFileReader(config, offset, 100);
events.clear();
stopwatch.reset();
while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && events.size() != 10) {
reader.read(events, 10, 0, TimeUnit.SECONDS);
}
Assert.assertEquals(10, events.size());
// Try to read more, should got nothing
reader.read(events, 10, 2, TimeUnit.SECONDS);
reader.close();
for (Closeable c : closeables) {
c.close();
}
}
use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.
the class StreamDataFileTestBase method testBasicReadWrite.
/**
* Test for basic read write to verify data encode/decode correctly.
* @throws Exception
*/
@Test
public void testBasicReadWrite() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L);
// Write 100 events to the stream, with 20 even timestamps
for (int i = 0; i < 40; i += 2) {
for (int j = 0; j < 5; j++) {
writer.append(StreamFileTestUtils.createEvent(i, "Basic test " + i));
}
}
writer.close();
// Create a reader that starts from beginning.
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<StreamEvent> events = Lists.newArrayList();
Assert.assertEquals(100, reader.read(events, 100, 1, TimeUnit.SECONDS));
Assert.assertEquals(-1, reader.read(events, 100, 1, TimeUnit.SECONDS));
reader.close();
// Collect the events in a multimap for verification
Multimap<Long, String> messages = LinkedListMultimap.create();
for (StreamEvent event : events) {
messages.put(event.getTimestamp(), Charsets.UTF_8.decode(event.getBody()).toString());
}
// 20 timestamps
Assert.assertEquals(20, messages.keySet().size());
for (Map.Entry<Long, Collection<String>> entry : messages.asMap().entrySet()) {
// Each timestamp has 5 messages
Assert.assertEquals(5, entry.getValue().size());
// All 5 messages for a timestamp are the same
Assert.assertEquals(1, ImmutableSet.copyOf(entry.getValue()).size());
// Message is "Basic test " + timestamp
Assert.assertEquals("Basic test " + entry.getKey(), entry.getValue().iterator().next());
}
}
use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.
the class StreamDataFileTestBase method testTail.
@Test
public void testTail() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
final Location eventFile = dir.getTempFile(".dat");
final Location indexFile = dir.getTempFile(".idx");
final CountDownLatch writerStarted = new CountDownLatch(1);
// Create a thread for writing 10 events, 1 event per 200 milliseconds.
// It pauses after writing 5 events.
final CountDownLatch waitLatch = new CountDownLatch(1);
Thread writerThread = new Thread() {
@Override
public void run() {
try {
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L);
writerStarted.countDown();
for (int i = 0; i < 10; i++) {
writer.append(StreamFileTestUtils.createEvent(i, "Testing " + i));
writer.flush();
TimeUnit.MILLISECONDS.sleep(200);
if (i == 4) {
waitLatch.await();
}
}
writer.close();
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
};
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<StreamEvent> events = Lists.newArrayList();
writerThread.start();
writerStarted.await();
// Expect 10 events, followed by EOF.
Assert.assertEquals(5, reader.read(events, 5, 2000, TimeUnit.MILLISECONDS));
waitLatch.countDown();
Assert.assertEquals(5, reader.read(events, 5, 2000, TimeUnit.MILLISECONDS));
Assert.assertEquals(-1, reader.read(events, 1, 500, TimeUnit.MILLISECONDS));
Assert.assertEquals(10, events.size());
// Verify the ordering of events
int ts = 0;
for (StreamEvent event : events) {
Assert.assertEquals(ts, event.getTimestamp());
Assert.assertEquals("Testing " + ts, Charsets.UTF_8.decode(event.getBody()).toString());
ts++;
}
}
use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.
the class StreamDataFileTestBase method testLargeDataBlock.
@Test
public void testLargeDataBlock() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L);
// Write 1200 events in one data block with each event has size of 150 bytes.
// This make sure it crosses the 128K read buffer boundary that is observed in HDFS.
// The StreamDataFileWriter has an internal data block buffer size of 256K,
// hence writing ~175K data block shouldn't go over the flush limit in the writer, making sure all
// events are in one data block
ByteBuffer body = Charsets.UTF_8.encode(Strings.repeat('0', 150));
for (int i = 0; i < 1200; i++) {
writer.append(new StreamEvent(ImmutableMap.<String, String>of(), body.duplicate(), 0));
}
writer.close();
// Read event one by one
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<StreamEvent> events = Lists.newArrayList();
for (int i = 0; i < 1200; i++) {
Assert.assertEquals(1, reader.read(events, 1, 0, TimeUnit.SECONDS));
Assert.assertEquals(body, events.get(0).getBody());
events.clear();
}
Assert.assertEquals(-1, reader.read(events, 1, 0, TimeUnit.SECONDS));
reader.close();
}
use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.
the class StreamDataFileTestBase method testAppendAll.
/**
* This test is to validate batch write with the same timestamp are written in the same data block.
*/
@Test
public void testAppendAll() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Creates a stream file
try (StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L)) {
final CountDownLatch writeCompleted = new CountDownLatch(1);
final CountDownLatch readAttempted = new CountDownLatch(1);
// Write 1000 events using appendAll from a separate thread
// It writes 1000 events of size 300 bytes of the same timestamp and wait for a signal before ending.
// This make sure the data block is not written (internal buffer size is 256K if the writer flush),
// hence the reader shouldn't be seeing it.
Thread t = new Thread() {
@Override
public void run() {
try {
writer.appendAll(new AbstractIterator<StreamEvent>() {
int count = 1000;
long timestamp = System.currentTimeMillis();
Map<String, String> headers = ImmutableMap.of();
@Override
protected StreamEvent computeNext() {
if (count-- > 0) {
return new StreamEvent(headers, Charsets.UTF_8.encode(String.format("%0300d", count)), timestamp);
}
writeCompleted.countDown();
Uninterruptibles.awaitUninterruptibly(readAttempted);
Flushables.flushQuietly(writer);
return endOfData();
}
});
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
};
t.start();
// Create a reader
try (StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile))) {
List<PositionStreamEvent> events = Lists.newArrayList();
// Wait for the writer completion
Assert.assertTrue(writeCompleted.await(20, TimeUnit.SECONDS));
// Try to read a event, nothing should be read
Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS));
// Now signal writer to flush
readAttempted.countDown();
// Now should be able to read 1000 events
t.join(10000);
Assert.assertEquals(1000, reader.read(events, 1000, 0, TimeUnit.SECONDS));
int size = events.size();
long lastStart = -1;
for (int i = 0; i < size; i++) {
PositionStreamEvent event = events.get(i);
Assert.assertEquals(String.format("%0300d", size - i - 1), Charsets.UTF_8.decode(event.getBody()).toString());
if (lastStart > 0) {
// The position differences between two consecutive events should be 303
// 2 bytes for body length, 300 bytes body, 1 byte header map (value == 0)
Assert.assertEquals(303L, event.getStart() - lastStart);
}
lastStart = event.getStart();
}
}
}
}
Aggregations