use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class MultiLiveStreamFileReaderTestBase method testMultiFileReader.
@Test
public void testMultiFileReader() throws Exception {
String streamName = "multiReader";
StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
Location location = getLocationFactory().create(streamName);
location.mkdirs();
// Create a stream with 1 partition.
StreamConfig config = new StreamConfig(streamId, Long.MAX_VALUE, 10000, Long.MAX_VALUE, location, null, 1000);
// Write out 200 events in 5 files, with interleaving timestamps
List<FileWriter<StreamEvent>> writers = Lists.newArrayList();
for (int i = 0; i < 5; i++) {
FileWriter<StreamEvent> writer = createWriter(config, "bucket" + i);
writers.add(writer);
for (int j = 0; j < 200; j++) {
long timestamp = j * 5 + i;
writer.append(StreamFileTestUtils.createEvent(timestamp, "Testing " + timestamp));
}
}
// Flush all writers.
for (FileWriter<StreamEvent> writer : writers) {
writer.flush();
}
// Create a multi stream file reader
List<StreamFileOffset> sources = Lists.newArrayList();
Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), 0, Long.MAX_VALUE);
for (int i = 0; i < 5; i++) {
Location eventFile = StreamUtils.createStreamLocation(partitionLocation, "bucket" + i, 0, StreamFileType.EVENT);
sources.add(new StreamFileOffset(eventFile, 0L, 0));
}
// Reads all events written so far.
MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(config, sources);
List<StreamEvent> events = Lists.newArrayList();
long expectedTimestamp = 0L;
for (int i = 0; i < 10; i++) {
Assert.assertEquals(100, reader.read(events, 100, 0, TimeUnit.SECONDS));
Assert.assertEquals(100, events.size());
for (StreamEvent event : events) {
Assert.assertEquals(expectedTimestamp, event.getTimestamp());
Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
expectedTimestamp++;
}
events.clear();
}
Assert.assertEquals(0, reader.read(events, 1, 1, TimeUnit.SECONDS));
// Writes some more events to the first three writers.
for (int i = 0; i < 3; i++) {
FileWriter<StreamEvent> writer = writers.get(i);
for (int j = 0; j < 10; j++) {
long timestamp = 1000 + j * 3 + i;
writer.append(StreamFileTestUtils.createEvent(timestamp, "Testing " + timestamp));
}
}
// Close all writers
for (FileWriter<StreamEvent> writer : writers) {
writer.close();
}
// Continue to read
Assert.assertEquals(30, reader.read(events, 30, 2, TimeUnit.SECONDS));
Assert.assertEquals(30, events.size());
for (StreamEvent event : events) {
Assert.assertEquals(expectedTimestamp, event.getTimestamp());
Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
expectedTimestamp++;
}
// Should get no more events.
Assert.assertEquals(0, reader.read(events, 1, 1, TimeUnit.SECONDS));
reader.close();
}
use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class StreamDataFileTestBase method testLiveStream.
/**
* Test live stream reader with new partitions and/or sequence file being created over time.
*/
@Category(SlowTests.class)
@Test
public void testLiveStream() throws Exception {
String streamName = "live";
StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
final String filePrefix = "prefix";
// 5 seconds
long partitionDuration = 5000;
Location location = getLocationFactory().create(streamName);
location.mkdirs();
final StreamConfig config = new StreamConfig(streamId, partitionDuration, 10000, Long.MAX_VALUE, location, null, 1000);
// Create a thread that will write 10 event per second
final AtomicInteger eventsWritten = new AtomicInteger();
final List<Closeable> closeables = Lists.newArrayList();
Thread writerThread = new Thread() {
@Override
public void run() {
try {
while (!interrupted()) {
FileWriter<StreamEvent> writer = createWriter(config, filePrefix);
closeables.add(writer);
for (int i = 0; i < 10; i++) {
long ts = System.currentTimeMillis();
writer.append(StreamFileTestUtils.createEvent(ts, "Testing"));
eventsWritten.getAndIncrement();
}
writer.flush();
TimeUnit.SECONDS.sleep(1);
}
} catch (IOException e) {
LOG.error(e.getMessage(), e);
throw Throwables.propagate(e);
} catch (InterruptedException e) {
// No-op
}
}
};
// Create a live reader start with one partition earlier than current time.
long partitionStart = StreamUtils.getPartitionStartTime(System.currentTimeMillis() - config.getPartitionDuration(), config.getPartitionDuration());
Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), partitionStart, config.getPartitionDuration());
Location eventLocation = StreamUtils.createStreamLocation(partitionLocation, filePrefix, 0, StreamFileType.EVENT);
// Creates a live stream reader that check for sequence file ever 100 millis.
FileReader<PositionStreamEvent, StreamFileOffset> reader = new LiveStreamFileReader(config, new StreamFileOffset(eventLocation, 0L, 0), 100);
List<StreamEvent> events = Lists.newArrayList();
// Try to read, since the writer thread is not started, it should get nothing
Assert.assertEquals(0, reader.read(events, 1, 2, TimeUnit.SECONDS));
// Start the writer thread.
writerThread.start();
Stopwatch stopwatch = new Stopwatch();
stopwatch.start();
while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && reader.read(events, 1, 1, TimeUnit.SECONDS) == 0) {
// Empty
}
stopwatch.stop();
// Should be able to read a event
Assert.assertEquals(1, events.size());
TimeUnit.MILLISECONDS.sleep(partitionDuration * 2);
writerThread.interrupt();
writerThread.join();
LOG.info("Writer stopped with {} events written.", eventsWritten.get());
stopwatch.reset();
while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && events.size() != eventsWritten.get()) {
reader.read(events, eventsWritten.get(), 0, TimeUnit.SECONDS);
}
// Should see all events written
Assert.assertEquals(eventsWritten.get(), events.size());
// Take a snapshot of the offset.
StreamFileOffset offset = new StreamFileOffset(reader.getPosition());
reader.close();
for (Closeable c : closeables) {
Closeables.closeQuietly(c);
}
// Now creates a new writer to write 10 more events across two partitions with a skip one partition.
try (FileWriter<StreamEvent> writer = createWriter(config, filePrefix)) {
for (int i = 0; i < 5; i++) {
long ts = System.currentTimeMillis();
writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + ts));
}
TimeUnit.MILLISECONDS.sleep(partitionDuration * 3 / 2);
for (int i = 0; i < 5; i++) {
long ts = System.currentTimeMillis();
writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + ts));
}
}
// Create a new reader with the previous offset
reader = new LiveStreamFileReader(config, offset, 100);
events.clear();
stopwatch.reset();
while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && events.size() != 10) {
reader.read(events, 10, 0, TimeUnit.SECONDS);
}
Assert.assertEquals(10, events.size());
// Try to read more, should got nothing
reader.read(events, 10, 2, TimeUnit.SECONDS);
reader.close();
for (Closeable c : closeables) {
c.close();
}
}
use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class StreamDataFileTestBase method testAppendAll.
/**
* This test is to validate batch write with the same timestamp are written in the same data block.
*/
@Test
public void testAppendAll() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Creates a stream file
try (StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L)) {
final CountDownLatch writeCompleted = new CountDownLatch(1);
final CountDownLatch readAttempted = new CountDownLatch(1);
// Write 1000 events using appendAll from a separate thread
// It writes 1000 events of size 300 bytes of the same timestamp and wait for a signal before ending.
// This make sure the data block is not written (internal buffer size is 256K if the writer flush),
// hence the reader shouldn't be seeing it.
Thread t = new Thread() {
@Override
public void run() {
try {
writer.appendAll(new AbstractIterator<StreamEvent>() {
int count = 1000;
long timestamp = System.currentTimeMillis();
Map<String, String> headers = ImmutableMap.of();
@Override
protected StreamEvent computeNext() {
if (count-- > 0) {
return new StreamEvent(headers, Charsets.UTF_8.encode(String.format("%0300d", count)), timestamp);
}
writeCompleted.countDown();
Uninterruptibles.awaitUninterruptibly(readAttempted);
Flushables.flushQuietly(writer);
return endOfData();
}
});
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
};
t.start();
// Create a reader
try (StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile))) {
List<PositionStreamEvent> events = Lists.newArrayList();
// Wait for the writer completion
Assert.assertTrue(writeCompleted.await(20, TimeUnit.SECONDS));
// Try to read a event, nothing should be read
Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS));
// Now signal writer to flush
readAttempted.countDown();
// Now should be able to read 1000 events
t.join(10000);
Assert.assertEquals(1000, reader.read(events, 1000, 0, TimeUnit.SECONDS));
int size = events.size();
long lastStart = -1;
for (int i = 0; i < size; i++) {
PositionStreamEvent event = events.get(i);
Assert.assertEquals(String.format("%0300d", size - i - 1), Charsets.UTF_8.decode(event.getBody()).toString());
if (lastStart > 0) {
// The position differences between two consecutive events should be 303
// 2 bytes for body length, 300 bytes body, 1 byte header map (value == 0)
Assert.assertEquals(303L, event.getStart() - lastStart);
}
lastStart = event.getStart();
}
}
}
}
use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class StreamDataFileTestBase method testEventTemplate.
/**
* This unit test is to test the v2 file format that supports
* defaulting values in stream event (timestamp and headers).
*/
@Test
public void testEventTemplate() throws IOException, InterruptedException {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Creates a stream file with the uni timestamp property and a default header (key=value)
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L, ImmutableMap.of(StreamDataFileConstants.Property.Key.UNI_TIMESTAMP, StreamDataFileConstants.Property.Value.CLOSE_TIMESTAMP, StreamDataFileConstants.Property.Key.EVENT_HEADER_PREFIX + "key", "value"));
// Write 1000 events with different timestamp
for (int i = 0; i < 1000; i++) {
writer.append(StreamFileTestUtils.createEvent(i, "Message " + i));
}
// Trying to get close timestamp should throw exception before the file get closed
try {
writer.getCloseTimestamp();
Assert.fail();
} catch (IllegalStateException e) {
// Expected
}
writer.close();
// Get the close timestamp from the file for assertion below
long timestamp = writer.getCloseTimestamp();
// Create a reader to read all events. All events should have the same timestamp
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<StreamEvent> events = Lists.newArrayList();
Assert.assertEquals(1000, reader.read(events, 1000, 0, TimeUnit.SECONDS));
// All events should have the same timestamp and contains a default header
for (StreamEvent event : events) {
Assert.assertEquals(timestamp, event.getTimestamp());
Assert.assertEquals("value", event.getHeaders().get("key"));
}
// No more events
Assert.assertEquals(-1, reader.read(events, 1, 0, TimeUnit.SECONDS));
reader.close();
// Open another read that reads with a filter that skips all events by timestamp
reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
int res = reader.read(events, 1, 0, TimeUnit.SECONDS, new ReadFilter() {
@Override
public boolean acceptTimestamp(long timestamp) {
return false;
}
});
Assert.assertEquals(-1, res);
reader.close();
}
use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class StreamDataFileTestBase method testFilter.
@Test
public void testFilter() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
final Location eventFile = dir.getTempFile(".dat");
final Location indexFile = dir.getTempFile(".idx");
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L);
writer.append(StreamFileTestUtils.createEvent(0, "Message 1"));
writer.flush();
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<StreamEvent> events = Lists.newArrayList();
final AtomicBoolean active = new AtomicBoolean(false);
ReadFilter filter = new ReadFilter() {
private long nextTimestamp = -1L;
@Override
public void reset() {
active.set(false);
nextTimestamp = -1L;
}
@Override
public boolean acceptTimestamp(long timestamp) {
active.set(true);
nextTimestamp = timestamp + 1;
return false;
}
@Override
public long getNextTimestampHint() {
return nextTimestamp;
}
};
Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS, filter));
Assert.assertTrue(active.get());
filter.reset();
Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS, filter));
Assert.assertFalse(active.get());
reader.close();
writer.close();
}
Aggregations