Search in sources :

Example 71 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamInputFormatTest method testFormatStreamRecordReader.

@Test
public void testFormatStreamRecordReader() throws IOException, InterruptedException {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());
    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L);
    StreamEvent streamEvent = new StreamEvent(ImmutableMap.of("header1", "value1", "header2", "value2"), Charsets.UTF_8.encode("hello world"), 1000);
    writer.append(streamEvent);
    writer.close();
    FormatSpecification formatSpec = new FormatSpecification(TextRecordFormat.class.getName(), Schema.recordOf("event", Schema.Field.of("body", Schema.of(Schema.Type.STRING))), Collections.<String, String>emptyMap());
    Configuration conf = new Configuration();
    AbstractStreamInputFormat.setStreamId(conf, DUMMY_ID);
    AbstractStreamInputFormat.setBodyFormatSpecification(conf, formatSpec);
    AbstractStreamInputFormat.setStreamPath(conf, inputDir.toURI());
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    AbstractStreamInputFormat format = new AbstractStreamInputFormat() {

        @Override
        public AuthorizationEnforcer getAuthorizationEnforcer(TaskAttemptContext context) {
            return new NoOpAuthorizer();
        }

        @Override
        public AuthenticationContext getAuthenticationContext(TaskAttemptContext context) {
            return new AuthenticationTestContext();
        }
    };
    // read all splits and store the results in the list
    List<GenericStreamEventData<StructuredRecord>> recordsRead = Lists.newArrayList();
    List<InputSplit> inputSplits = format.getSplits(context);
    for (InputSplit split : inputSplits) {
        RecordReader<LongWritable, GenericStreamEventData<StructuredRecord>> recordReader = format.createRecordReader(split, context);
        recordReader.initialize(split, context);
        while (recordReader.nextKeyValue()) {
            recordsRead.add(recordReader.getCurrentValue());
        }
    }
    // should only have read 1 record
    Assert.assertEquals(1, recordsRead.size());
    GenericStreamEventData<StructuredRecord> eventData = recordsRead.get(0);
    Assert.assertEquals(streamEvent.getHeaders(), eventData.getHeaders());
    Assert.assertEquals("hello world", eventData.getBody().get("body"));
}
Also used : TextRecordFormat(co.cask.cdap.format.TextRecordFormat) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) AuthenticationTestContext(co.cask.cdap.security.auth.context.AuthenticationTestContext) NoOpAuthorizer(co.cask.cdap.security.spi.authorization.NoOpAuthorizer) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) GenericStreamEventData(co.cask.cdap.api.stream.GenericStreamEventData) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) LongWritable(org.apache.hadoop.io.LongWritable) File(java.io.File) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Example 72 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamInputFormatTest method testIdentityStreamEventDecoder.

@Test
public void testIdentityStreamEventDecoder() {
    ImmutableMap.Builder<String, String> headers = ImmutableMap.builder();
    headers.put("key1", "value1");
    headers.put("key2", "value2");
    ByteBuffer buffer = Charsets.UTF_8.encode("testdata");
    StreamEvent event = new StreamEvent(headers.build(), buffer, System.currentTimeMillis());
    StreamEventDecoder<LongWritable, StreamEvent> decoder = new IdentityStreamEventDecoder();
    StreamEventDecoder.DecodeResult<LongWritable, StreamEvent> result = new StreamEventDecoder.DecodeResult<>();
    result = decoder.decode(event, result);
    Assert.assertEquals(new LongWritable(event.getTimestamp()), result.getKey());
    Assert.assertEquals(event, result.getValue());
}
Also used : IdentityStreamEventDecoder(co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder) StringStreamEventDecoder(co.cask.cdap.data.stream.decoder.StringStreamEventDecoder) StreamEventDecoder(co.cask.cdap.api.stream.StreamEventDecoder) BytesStreamEventDecoder(co.cask.cdap.data.stream.decoder.BytesStreamEventDecoder) TextStreamEventDecoder(co.cask.cdap.data.stream.decoder.TextStreamEventDecoder) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) IdentityStreamEventDecoder(co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder) LongWritable(org.apache.hadoop.io.LongWritable) ByteBuffer(java.nio.ByteBuffer) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 73 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class ConcurrentStreamWriterTestBase method testConcurrentWrite.

@Test
public void testConcurrentWrite() throws Exception {
    final String streamName = "testConcurrentWrite";
    NamespaceId namespace = new NamespaceId("namespace");
    StreamId streamId = namespace.stream(streamName);
    StreamAdmin streamAdmin = new TestStreamAdmin(getNamespacedLocationFactory(), Long.MAX_VALUE, 1000);
    int threads = Runtime.getRuntime().availableProcessors() * 4;
    StreamFileWriterFactory fileWriterFactory = createStreamFileWriterFactory();
    final ConcurrentStreamWriter streamWriter = createStreamWriter(streamId, streamAdmin, threads, fileWriterFactory);
    // Starts n threads to write events through stream writer, each thread write 1000 events
    final int msgPerThread = 1000;
    final CountDownLatch startLatch = new CountDownLatch(1);
    final CountDownLatch completion = new CountDownLatch(threads);
    ExecutorService executor = Executors.newFixedThreadPool(threads);
    // Half of the threads write events one by one, the other half writes in batch of size 10
    for (int i = 0; i < threads / 2; i++) {
        executor.execute(createWriterTask(streamId, streamWriter, i, msgPerThread, 1, startLatch, completion));
    }
    for (int i = threads / 2; i < threads; i++) {
        executor.execute(createWriterTask(streamId, streamWriter, i, msgPerThread, 10, startLatch, completion));
    }
    startLatch.countDown();
    Assert.assertTrue(completion.await(120, TimeUnit.SECONDS));
    // Verify all events are written.
    // There should be only one partition and one file inside
    Location partitionLocation = streamAdmin.getConfig(streamId).getLocation().list().get(0);
    Location streamLocation = StreamUtils.createStreamLocation(partitionLocation, fileWriterFactory.getFileNamePrefix(), 0, StreamFileType.EVENT);
    StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(streamLocation));
    List<StreamEvent> events = Lists.newArrayListWithCapacity(threads * msgPerThread);
    // Should read all messages
    Assert.assertEquals(threads * msgPerThread, reader.read(events, Integer.MAX_VALUE, 0, TimeUnit.SECONDS));
    // Verify all messages as expected
    Assert.assertTrue(verifyEvents(threads, msgPerThread, events));
    reader.close();
    streamWriter.close();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) CountDownLatch(java.util.concurrent.CountDownLatch) StreamAdmin(co.cask.cdap.data2.transaction.stream.StreamAdmin) NoopStreamAdmin(co.cask.cdap.data.stream.NoopStreamAdmin) StreamFileWriterFactory(co.cask.cdap.data.stream.StreamFileWriterFactory) LocationStreamFileWriterFactory(co.cask.cdap.data.runtime.LocationStreamFileWriterFactory) ExecutorService(java.util.concurrent.ExecutorService) NamespaceId(co.cask.cdap.proto.id.NamespaceId) StreamDataFileReader(co.cask.cdap.data.stream.StreamDataFileReader) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 74 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamDataFileTestBase method testAppendAllMultiBlocks.

/**
 * This is to test batch write with different timestamps will write to different data block correctly.
 */
@Test
public void testAppendAllMultiBlocks() throws IOException, InterruptedException {
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    Location eventFile = dir.getTempFile(".dat");
    Location indexFile = dir.getTempFile(".idx");
    try (StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L)) {
        // Writes with appendAll with events having 2 different timestamps
        Map<String, String> headers = ImmutableMap.of();
        writer.appendAll(ImmutableList.of(new StreamEvent(headers, Charsets.UTF_8.encode("0"), 1000), new StreamEvent(headers, Charsets.UTF_8.encode("0"), 1000), new StreamEvent(headers, Charsets.UTF_8.encode("1"), 1001), new StreamEvent(headers, Charsets.UTF_8.encode("1"), 1001)).iterator());
    }
    // Reads all events and assert the event position to see if they are in two different blocks
    try (StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile))) {
        List<PositionStreamEvent> events = Lists.newArrayList();
        Assert.assertEquals(4, reader.read(events, 4, 0, TimeUnit.SECONDS));
        // Event is encoded as <var_int_body_length><body_bytes><var_int_map_size>
        // Since we are writing single byte data,
        // body_length is 1 byte, body_bytes is 1 byte and map_size is 1 byte (with value == 0)
        // The position differences between the first two events should be 3 since they belongs to the same data block.
        Assert.assertEquals(3L, events.get(1).getStart() - events.get(0).getStart());
        // The position differences between the second and third events
        // should be 3 (second event size) + 8 (timestamp) + 1 (block length) == 12
        Assert.assertEquals(12L, events.get(2).getStart() - events.get(1).getStart());
        // The position differences between the third and forth events should be 3 again since they are in the same block
        Assert.assertEquals(3L, events.get(3).getStart() - events.get(2).getStart());
    }
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 75 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamDataFileTestBase method testTailNotExists.

@Test
public void testTailNotExists() throws IOException, InterruptedException {
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    Location eventFile = dir.getTempFile(".dat");
    Location indexFile = dir.getTempFile(".idx");
    // Create a read on non-exist file and try reading, it should be ok with 0 events read.
    List<StreamEvent> events = Lists.newArrayList();
    StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
    Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS));
    // Write an event
    StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 100L);
    writer.append(StreamFileTestUtils.createEvent(100, "Testing"));
    writer.flush();
    // Reads the event just written
    Assert.assertEquals(1, reader.read(events, 1, 0, TimeUnit.SECONDS));
    Assert.assertEquals(100, events.get(0).getTimestamp());
    Assert.assertEquals("Testing", Charsets.UTF_8.decode(events.get(0).getBody()).toString());
    // Close the writer.
    writer.close();
    // Reader should return EOF (after some time, as closing of file takes time on HDFS.
    Assert.assertEquals(-1, reader.read(events, 1, 2, TimeUnit.SECONDS));
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Aggregations

StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)84 Test (org.junit.Test)65 Location (org.apache.twill.filesystem.Location)27 StreamId (co.cask.cdap.proto.id.StreamId)24 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)19 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)17 Schema (co.cask.cdap.api.data.schema.Schema)10 IOException (java.io.IOException)9 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)8 ByteBuffer (java.nio.ByteBuffer)8 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)7 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)6 TransactionContext (org.apache.tephra.TransactionContext)6 BinaryDecoder (co.cask.cdap.common.io.BinaryDecoder)5 TypeToken (com.google.common.reflect.TypeToken)5 StreamEventCodec (co.cask.cdap.common.stream.StreamEventCodec)4 IdentityStreamEventDecoder (co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder)4 File (java.io.File)4 SchemaHash (co.cask.cdap.api.data.schema.SchemaHash)3 QueueName (co.cask.cdap.common.queue.QueueName)3