Search in sources :

Example 11 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class GrokRecordFormatTest method testSimple.

@Test
public void testSimple() throws Exception {
    Schema schema = Schema.recordOf("streamEvent", Schema.Field.of("user", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("body", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    FormatSpecification spec = new FormatSpecification(Formats.GROK, schema, GrokRecordFormat.settings("%{USER:user}:%{GREEDYDATA:body}"));
    RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(spec);
    String message = "nitin:falkfjaksjf fkafjalkf fa fasfsalfsaf af afaslkfjasf asf af asf";
    StructuredRecord record = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(message))));
    Assert.assertEquals("nitin", record.get("user"));
    Assert.assertEquals("falkfjaksjf fkafjalkf fa fasfsalfsaf af afaslkfjasf asf af asf", record.get("body"));
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 12 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class MultiLiveStreamFileReaderTestBase method testMultiFileReader.

@Test
public void testMultiFileReader() throws Exception {
    String streamName = "multiReader";
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    Location location = getLocationFactory().create(streamName);
    location.mkdirs();
    // Create a stream with 1 partition.
    StreamConfig config = new StreamConfig(streamId, Long.MAX_VALUE, 10000, Long.MAX_VALUE, location, null, 1000);
    // Write out 200 events in 5 files, with interleaving timestamps
    List<FileWriter<StreamEvent>> writers = Lists.newArrayList();
    for (int i = 0; i < 5; i++) {
        FileWriter<StreamEvent> writer = createWriter(config, "bucket" + i);
        writers.add(writer);
        for (int j = 0; j < 200; j++) {
            long timestamp = j * 5 + i;
            writer.append(StreamFileTestUtils.createEvent(timestamp, "Testing " + timestamp));
        }
    }
    // Flush all writers.
    for (FileWriter<StreamEvent> writer : writers) {
        writer.flush();
    }
    // Create a multi stream file reader
    List<StreamFileOffset> sources = Lists.newArrayList();
    Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), 0, Long.MAX_VALUE);
    for (int i = 0; i < 5; i++) {
        Location eventFile = StreamUtils.createStreamLocation(partitionLocation, "bucket" + i, 0, StreamFileType.EVENT);
        sources.add(new StreamFileOffset(eventFile, 0L, 0));
    }
    // Reads all events written so far.
    MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(config, sources);
    List<StreamEvent> events = Lists.newArrayList();
    long expectedTimestamp = 0L;
    for (int i = 0; i < 10; i++) {
        Assert.assertEquals(100, reader.read(events, 100, 0, TimeUnit.SECONDS));
        Assert.assertEquals(100, events.size());
        for (StreamEvent event : events) {
            Assert.assertEquals(expectedTimestamp, event.getTimestamp());
            Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
            expectedTimestamp++;
        }
        events.clear();
    }
    Assert.assertEquals(0, reader.read(events, 1, 1, TimeUnit.SECONDS));
    // Writes some more events to the first three writers.
    for (int i = 0; i < 3; i++) {
        FileWriter<StreamEvent> writer = writers.get(i);
        for (int j = 0; j < 10; j++) {
            long timestamp = 1000 + j * 3 + i;
            writer.append(StreamFileTestUtils.createEvent(timestamp, "Testing " + timestamp));
        }
    }
    // Close all writers
    for (FileWriter<StreamEvent> writer : writers) {
        writer.close();
    }
    // Continue to read
    Assert.assertEquals(30, reader.read(events, 30, 2, TimeUnit.SECONDS));
    Assert.assertEquals(30, events.size());
    for (StreamEvent event : events) {
        Assert.assertEquals(expectedTimestamp, event.getTimestamp());
        Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
        expectedTimestamp++;
    }
    // Should get no more events.
    Assert.assertEquals(0, reader.read(events, 1, 1, TimeUnit.SECONDS));
    reader.close();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) FileWriter(co.cask.cdap.data.file.FileWriter) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 13 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamFileJanitorTestBase method testCleanupGeneration.

/**
 * Test the clean up of the janitor, also checks that user without privilege on the stream is also able to clean up
 * the stream.
 */
@Test
public void testCleanupGeneration() throws Exception {
    // Create a stream and performs couple truncate
    String streamName = "testCleanupGeneration";
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    authorizer.grant(Authorizable.fromEntityId(streamId), ALICE, EnumSet.of(Action.ADMIN));
    StreamAdmin streamAdmin = getStreamAdmin();
    streamAdmin.create(streamId);
    StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    for (int i = 0; i < 5; i++) {
        FileWriter<StreamEvent> writer = createWriter(streamId);
        writer.append(StreamFileTestUtils.createEvent(System.currentTimeMillis(), "Testing"));
        writer.close();
        // Call cleanup before truncate. The current generation should stand.
        janitor.clean(streamConfig.getLocation(), streamConfig.getTTL(), System.currentTimeMillis());
        verifyGeneration(streamConfig, i);
        streamAdmin.truncate(streamId);
    }
    SecurityRequestContext.setUserId(BOB.getName());
    int generation = StreamUtils.getGeneration(streamConfig);
    Assert.assertEquals(5, generation);
    janitor.clean(streamConfig.getLocation(), streamConfig.getTTL(), System.currentTimeMillis());
    // Verify the stream directory should only contains the generation directory
    for (Location location : streamConfig.getLocation().list()) {
        if (location.isDirectory()) {
            Assert.assertEquals(generation, Integer.parseInt(location.getName()));
        }
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamAdmin(co.cask.cdap.data2.transaction.stream.StreamAdmin) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 14 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamInputFormatTest method testStreamRecordReader.

@Test
public void testStreamRecordReader() throws Exception {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());
    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L);
    writer.append(StreamFileTestUtils.createEvent(1000, "test"));
    writer.flush();
    // get splits from the input format. Expect to get 2 splits,
    // one from 0 - some offset and one from offset - Long.MAX_VALUE.
    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    AbstractStreamInputFormat.setStreamId(conf, DUMMY_ID);
    AbstractStreamInputFormat.setStreamPath(conf, inputDir.toURI());
    AbstractStreamInputFormat format = new AbstractStreamInputFormat() {

        @Override
        public AuthorizationEnforcer getAuthorizationEnforcer(TaskAttemptContext context) {
            return new NoOpAuthorizer();
        }

        @Override
        public AuthenticationContext getAuthenticationContext(TaskAttemptContext context) {
            return new AuthenticationTestContext();
        }
    };
    List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID()));
    Assert.assertEquals(2, splits.size());
    // write another event so that the 2nd split has something to read
    writer.append(StreamFileTestUtils.createEvent(1001, "test"));
    writer.close();
    // create a record reader for the 2nd split
    StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>(new IdentityStreamEventDecoder(), new NoOpAuthorizer(), new AuthenticationTestContext(), DUMMY_ID);
    recordReader.initialize(splits.get(1), context);
    // check that we read the 2nd stream event
    Assert.assertTrue(recordReader.nextKeyValue());
    StreamEvent output = recordReader.getCurrentValue();
    Assert.assertEquals(1001, output.getTimestamp());
    Assert.assertEquals("test", Bytes.toString(output.getBody()));
    // check that there is nothing more to read
    Assert.assertFalse(recordReader.nextKeyValue());
}
Also used : JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) AuthenticationTestContext(co.cask.cdap.security.auth.context.AuthenticationTestContext) NoOpAuthorizer(co.cask.cdap.security.spi.authorization.NoOpAuthorizer) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) IdentityStreamEventDecoder(co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder) LongWritable(org.apache.hadoop.io.LongWritable) File(java.io.File) InputSplit(org.apache.hadoop.mapreduce.InputSplit) JobConf(org.apache.hadoop.mapred.JobConf) JobID(org.apache.hadoop.mapred.JobID) Test(org.junit.Test)

Example 15 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamInputFormatTest method testStringStreamEventDecoder.

@Test
public void testStringStreamEventDecoder() {
    String body = "Testing";
    StreamEvent event = new StreamEvent(ImmutableMap.<String, String>of(), Charsets.UTF_8.encode(body));
    StreamEventDecoder<LongWritable, String> decoder = new StringStreamEventDecoder();
    StreamEventDecoder.DecodeResult<LongWritable, String> result = new StreamEventDecoder.DecodeResult<>();
    result = decoder.decode(event, result);
    Assert.assertEquals(event.getTimestamp(), result.getKey().get());
    Assert.assertEquals(body, result.getValue());
}
Also used : IdentityStreamEventDecoder(co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder) StringStreamEventDecoder(co.cask.cdap.data.stream.decoder.StringStreamEventDecoder) StreamEventDecoder(co.cask.cdap.api.stream.StreamEventDecoder) BytesStreamEventDecoder(co.cask.cdap.data.stream.decoder.BytesStreamEventDecoder) TextStreamEventDecoder(co.cask.cdap.data.stream.decoder.TextStreamEventDecoder) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) LongWritable(org.apache.hadoop.io.LongWritable) StringStreamEventDecoder(co.cask.cdap.data.stream.decoder.StringStreamEventDecoder) Test(org.junit.Test)

Aggregations

StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)84 Test (org.junit.Test)65 Location (org.apache.twill.filesystem.Location)27 StreamId (co.cask.cdap.proto.id.StreamId)24 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)19 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)17 Schema (co.cask.cdap.api.data.schema.Schema)10 IOException (java.io.IOException)9 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)8 ByteBuffer (java.nio.ByteBuffer)8 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)7 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)6 TransactionContext (org.apache.tephra.TransactionContext)6 BinaryDecoder (co.cask.cdap.common.io.BinaryDecoder)5 TypeToken (com.google.common.reflect.TypeToken)5 StreamEventCodec (co.cask.cdap.common.stream.StreamEventCodec)4 IdentityStreamEventDecoder (co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder)4 File (java.io.File)4 SchemaHash (co.cask.cdap.api.data.schema.SchemaHash)3 QueueName (co.cask.cdap.common.queue.QueueName)3