use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.
the class StreamInputFormatTest method testFormatStreamRecordReader.
@Test
public void testFormatStreamRecordReader() throws IOException, InterruptedException {
File inputDir = tmpFolder.newFolder();
File partition = new File(inputDir, "1.1000");
partition.mkdirs();
File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());
// write 1 event
StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L);
StreamEvent streamEvent = new StreamEvent(ImmutableMap.of("header1", "value1", "header2", "value2"), Charsets.UTF_8.encode("hello world"), 1000);
writer.append(streamEvent);
writer.close();
FormatSpecification formatSpec = new FormatSpecification(TextRecordFormat.class.getName(), Schema.recordOf("event", Schema.Field.of("body", Schema.of(Schema.Type.STRING))), Collections.<String, String>emptyMap());
Configuration conf = new Configuration();
AbstractStreamInputFormat.setStreamId(conf, DUMMY_ID);
AbstractStreamInputFormat.setBodyFormatSpecification(conf, formatSpec);
AbstractStreamInputFormat.setStreamPath(conf, inputDir.toURI());
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
AbstractStreamInputFormat format = new AbstractStreamInputFormat() {
@Override
public AuthorizationEnforcer getAuthorizationEnforcer(TaskAttemptContext context) {
return new NoOpAuthorizer();
}
@Override
public AuthenticationContext getAuthenticationContext(TaskAttemptContext context) {
return new AuthenticationTestContext();
}
};
// read all splits and store the results in the list
List<GenericStreamEventData<StructuredRecord>> recordsRead = Lists.newArrayList();
List<InputSplit> inputSplits = format.getSplits(context);
for (InputSplit split : inputSplits) {
RecordReader<LongWritable, GenericStreamEventData<StructuredRecord>> recordReader = format.createRecordReader(split, context);
recordReader.initialize(split, context);
while (recordReader.nextKeyValue()) {
recordsRead.add(recordReader.getCurrentValue());
}
}
// should only have read 1 record
Assert.assertEquals(1, recordsRead.size());
GenericStreamEventData<StructuredRecord> eventData = recordsRead.get(0);
Assert.assertEquals(streamEvent.getHeaders(), eventData.getHeaders());
Assert.assertEquals("hello world", eventData.getBody().get("body"));
}
use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.
the class StreamInputFormatTest method testIdentityStreamEventDecoder.
@Test
public void testIdentityStreamEventDecoder() {
ImmutableMap.Builder<String, String> headers = ImmutableMap.builder();
headers.put("key1", "value1");
headers.put("key2", "value2");
ByteBuffer buffer = Charsets.UTF_8.encode("testdata");
StreamEvent event = new StreamEvent(headers.build(), buffer, System.currentTimeMillis());
StreamEventDecoder<LongWritable, StreamEvent> decoder = new IdentityStreamEventDecoder();
StreamEventDecoder.DecodeResult<LongWritable, StreamEvent> result = new StreamEventDecoder.DecodeResult<>();
result = decoder.decode(event, result);
Assert.assertEquals(new LongWritable(event.getTimestamp()), result.getKey());
Assert.assertEquals(event, result.getValue());
}
use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.
the class ConcurrentStreamWriterTestBase method testConcurrentWrite.
@Test
public void testConcurrentWrite() throws Exception {
final String streamName = "testConcurrentWrite";
NamespaceId namespace = new NamespaceId("namespace");
StreamId streamId = namespace.stream(streamName);
StreamAdmin streamAdmin = new TestStreamAdmin(getNamespacedLocationFactory(), Long.MAX_VALUE, 1000);
int threads = Runtime.getRuntime().availableProcessors() * 4;
StreamFileWriterFactory fileWriterFactory = createStreamFileWriterFactory();
final ConcurrentStreamWriter streamWriter = createStreamWriter(streamId, streamAdmin, threads, fileWriterFactory);
// Starts n threads to write events through stream writer, each thread write 1000 events
final int msgPerThread = 1000;
final CountDownLatch startLatch = new CountDownLatch(1);
final CountDownLatch completion = new CountDownLatch(threads);
ExecutorService executor = Executors.newFixedThreadPool(threads);
// Half of the threads write events one by one, the other half writes in batch of size 10
for (int i = 0; i < threads / 2; i++) {
executor.execute(createWriterTask(streamId, streamWriter, i, msgPerThread, 1, startLatch, completion));
}
for (int i = threads / 2; i < threads; i++) {
executor.execute(createWriterTask(streamId, streamWriter, i, msgPerThread, 10, startLatch, completion));
}
startLatch.countDown();
Assert.assertTrue(completion.await(120, TimeUnit.SECONDS));
// Verify all events are written.
// There should be only one partition and one file inside
Location partitionLocation = streamAdmin.getConfig(streamId).getLocation().list().get(0);
Location streamLocation = StreamUtils.createStreamLocation(partitionLocation, fileWriterFactory.getFileNamePrefix(), 0, StreamFileType.EVENT);
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(streamLocation));
List<StreamEvent> events = Lists.newArrayListWithCapacity(threads * msgPerThread);
// Should read all messages
Assert.assertEquals(threads * msgPerThread, reader.read(events, Integer.MAX_VALUE, 0, TimeUnit.SECONDS));
// Verify all messages as expected
Assert.assertTrue(verifyEvents(threads, msgPerThread, events));
reader.close();
streamWriter.close();
}
use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.
the class StreamDataFileTestBase method testAppendAllMultiBlocks.
/**
* This is to test batch write with different timestamps will write to different data block correctly.
*/
@Test
public void testAppendAllMultiBlocks() throws IOException, InterruptedException {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
try (StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L)) {
// Writes with appendAll with events having 2 different timestamps
Map<String, String> headers = ImmutableMap.of();
writer.appendAll(ImmutableList.of(new StreamEvent(headers, Charsets.UTF_8.encode("0"), 1000), new StreamEvent(headers, Charsets.UTF_8.encode("0"), 1000), new StreamEvent(headers, Charsets.UTF_8.encode("1"), 1001), new StreamEvent(headers, Charsets.UTF_8.encode("1"), 1001)).iterator());
}
// Reads all events and assert the event position to see if they are in two different blocks
try (StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile))) {
List<PositionStreamEvent> events = Lists.newArrayList();
Assert.assertEquals(4, reader.read(events, 4, 0, TimeUnit.SECONDS));
// Event is encoded as <var_int_body_length><body_bytes><var_int_map_size>
// Since we are writing single byte data,
// body_length is 1 byte, body_bytes is 1 byte and map_size is 1 byte (with value == 0)
// The position differences between the first two events should be 3 since they belongs to the same data block.
Assert.assertEquals(3L, events.get(1).getStart() - events.get(0).getStart());
// The position differences between the second and third events
// should be 3 (second event size) + 8 (timestamp) + 1 (block length) == 12
Assert.assertEquals(12L, events.get(2).getStart() - events.get(1).getStart());
// The position differences between the third and forth events should be 3 again since they are in the same block
Assert.assertEquals(3L, events.get(3).getStart() - events.get(2).getStart());
}
}
use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.
the class StreamDataFileTestBase method testTailNotExists.
@Test
public void testTailNotExists() throws IOException, InterruptedException {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Create a read on non-exist file and try reading, it should be ok with 0 events read.
List<StreamEvent> events = Lists.newArrayList();
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS));
// Write an event
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 100L);
writer.append(StreamFileTestUtils.createEvent(100, "Testing"));
writer.flush();
// Reads the event just written
Assert.assertEquals(1, reader.read(events, 1, 0, TimeUnit.SECONDS));
Assert.assertEquals(100, events.get(0).getTimestamp());
Assert.assertEquals("Testing", Charsets.UTF_8.decode(events.get(0).getBody()).toString());
// Close the writer.
writer.close();
// Reader should return EOF (after some time, as closing of file takes time on HDFS.
Assert.assertEquals(-1, reader.read(events, 1, 2, TimeUnit.SECONDS));
}
Aggregations