use of co.cask.cdap.data.file.ReadFilter in project cdap by caskdata.
the class StreamDataFileTestBase method testEventTemplate.
/**
* This unit test is to test the v2 file format that supports
* defaulting values in stream event (timestamp and headers).
*/
@Test
public void testEventTemplate() throws IOException, InterruptedException {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Creates a stream file with the uni timestamp property and a default header (key=value)
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L, ImmutableMap.of(StreamDataFileConstants.Property.Key.UNI_TIMESTAMP, StreamDataFileConstants.Property.Value.CLOSE_TIMESTAMP, StreamDataFileConstants.Property.Key.EVENT_HEADER_PREFIX + "key", "value"));
// Write 1000 events with different timestamp
for (int i = 0; i < 1000; i++) {
writer.append(StreamFileTestUtils.createEvent(i, "Message " + i));
}
// Trying to get close timestamp should throw exception before the file get closed
try {
writer.getCloseTimestamp();
Assert.fail();
} catch (IllegalStateException e) {
// Expected
}
writer.close();
// Get the close timestamp from the file for assertion below
long timestamp = writer.getCloseTimestamp();
// Create a reader to read all events. All events should have the same timestamp
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<StreamEvent> events = Lists.newArrayList();
Assert.assertEquals(1000, reader.read(events, 1000, 0, TimeUnit.SECONDS));
// All events should have the same timestamp and contains a default header
for (StreamEvent event : events) {
Assert.assertEquals(timestamp, event.getTimestamp());
Assert.assertEquals("value", event.getHeaders().get("key"));
}
// No more events
Assert.assertEquals(-1, reader.read(events, 1, 0, TimeUnit.SECONDS));
reader.close();
// Open another read that reads with a filter that skips all events by timestamp
reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
int res = reader.read(events, 1, 0, TimeUnit.SECONDS, new ReadFilter() {
@Override
public boolean acceptTimestamp(long timestamp) {
return false;
}
});
Assert.assertEquals(-1, res);
reader.close();
}
use of co.cask.cdap.data.file.ReadFilter in project cdap by caskdata.
the class StreamDataFileTestBase method testFilter.
@Test
public void testFilter() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
final Location eventFile = dir.getTempFile(".dat");
final Location indexFile = dir.getTempFile(".idx");
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L);
writer.append(StreamFileTestUtils.createEvent(0, "Message 1"));
writer.flush();
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<StreamEvent> events = Lists.newArrayList();
final AtomicBoolean active = new AtomicBoolean(false);
ReadFilter filter = new ReadFilter() {
private long nextTimestamp = -1L;
@Override
public void reset() {
active.set(false);
nextTimestamp = -1L;
}
@Override
public boolean acceptTimestamp(long timestamp) {
active.set(true);
nextTimestamp = timestamp + 1;
return false;
}
@Override
public long getNextTimestampHint() {
return nextTimestamp;
}
};
Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS, filter));
Assert.assertTrue(active.get());
filter.reset();
Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS, filter));
Assert.assertFalse(active.get());
reader.close();
writer.close();
}
use of co.cask.cdap.data.file.ReadFilter in project cdap by caskdata.
the class AbstractStreamFileConsumer method createBaseReadFilter.
private ReadFilter createBaseReadFilter(final ConsumerConfig consumerConfig) {
final int groupSize = consumerConfig.getGroupSize();
final DequeueStrategy strategy = consumerConfig.getDequeueStrategy();
if (groupSize == 1 || strategy == DequeueStrategy.FIFO) {
return ReadFilter.ALWAYS_ACCEPT;
}
// For RoundRobin and Hash partition, the claim is done by matching hashCode to instance id.
// For Hash, to preserve existing behavior, everything route to instance 0.
// For RoundRobin, the idea is to scatter the events across consumers evenly. Since there is no way to known
// about the absolute starting point to do true round robin, we employ a good enough hash function on the
// file offset as a way to spread events across consumers
final int instanceId = consumerConfig.getInstanceId();
return new ReadFilter() {
@Override
public boolean acceptOffset(long offset) {
int hashValue = Math.abs(strategy == DequeueStrategy.HASH ? 0 : ROUND_ROBIN_HASHER.hashLong(offset).hashCode());
return instanceId == (hashValue % groupSize);
}
};
}
Aggregations