Search in sources :

Example 51 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class GrokRecordFormatTest method testSyslog.

@Test
public void testSyslog() throws Exception {
    FormatSpecification spec = new FormatSpecification(Formats.SYSLOG, null, Collections.<String, String>emptyMap());
    RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(spec);
    String message = "Oct 17 08:59:00 suod newsyslog[6215]: logfile turned over";
    StructuredRecord record = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(message))));
    Assert.assertEquals("Oct 17 08:59:00", record.get("timestamp"));
    Assert.assertEquals("suod", record.get("logsource"));
    Assert.assertEquals("newsyslog", record.get("program"));
    Assert.assertEquals("6215", record.get("pid"));
    Assert.assertEquals("logfile turned over", record.get("message"));
    message = "Oct 17 08:59:04 cdr.cs.colorado.edu amd[29648]: " + "noconn option exists, and was turned on! (May cause NFS hangs on some systems...)";
    record = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(message))));
    Assert.assertEquals("Oct 17 08:59:04", record.get("timestamp"));
    Assert.assertEquals("cdr.cs.colorado.edu", record.get("logsource"));
    Assert.assertEquals("amd", record.get("program"));
    Assert.assertEquals("29648", record.get("pid"));
    Assert.assertEquals("noconn option exists, and was turned on! (May cause NFS hangs on some systems...)", record.get("message"));
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 52 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class GrokRecordFormatTest method testDefault.

@Test
public void testDefault() throws Exception {
    FormatSpecification spec = new FormatSpecification(Formats.GROK, null, GrokRecordFormat.settings("%{GREEDYDATA:body}"));
    RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(spec);
    String message = "Oct 17 08:59:00 suod newsyslog[6215]: logfile turned over";
    StructuredRecord record = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(message))));
    Assert.assertEquals("Oct 17 08:59:00 suod newsyslog[6215]: logfile turned over", record.get("body"));
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 53 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class AvroRecordFormatTest method testNestedRecord.

@Test
public void testNestedRecord() throws Exception {
    Schema innerSchema = Schema.recordOf("inner", Schema.Field.of("int", Schema.of(Schema.Type.INT)), Schema.Field.of("double", Schema.of(Schema.Type.DOUBLE)), Schema.Field.of("array", Schema.arrayOf(Schema.of(Schema.Type.FLOAT))), Schema.Field.of("map", Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.STRING))));
    Schema schema = Schema.recordOf("record", Schema.Field.of("int", Schema.of(Schema.Type.INT)), Schema.Field.of("record", innerSchema));
    org.apache.avro.Schema avroInnerSchema = convertSchema(innerSchema);
    org.apache.avro.Schema avroSchema = convertSchema(schema);
    GenericRecord record = new GenericRecordBuilder(avroSchema).set("int", Integer.MAX_VALUE).set("record", new GenericRecordBuilder(avroInnerSchema).set("int", 5).set("double", 3.14159).set("array", ImmutableList.of(1.0f, 2.0f)).set("map", ImmutableMap.of("key", "value")).build()).build();
    FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.emptyMap());
    RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
    StructuredRecord actual = format.read(toStreamEvent(record));
    Assert.assertEquals(Integer.MAX_VALUE, (int) actual.get("int"));
    StructuredRecord actualInner = actual.get("record");
    Assert.assertEquals(5, (int) actualInner.get("int"));
    Assert.assertEquals(3.14159d, actualInner.get("double"), 0.0001d);
    List<Float> array = actualInner.get("array");
    Assert.assertEquals(ImmutableList.of(1.0f, 2.0f), array);
    Map<String, String> map = actualInner.get("map");
    Assert.assertEquals(ImmutableMap.of("key", "value"), map);
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 54 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class AvroRecordFormatTest method testSchemaProjection.

@Test
public void testSchemaProjection() throws Exception {
    Schema sourceSchema = Schema.recordOf("source", Schema.Field.of("id", Schema.of(Schema.Type.INT)), Schema.Field.of("name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    Schema readSchema = Schema.recordOf("read", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    GenericRecord record = new GenericRecordBuilder(convertSchema(sourceSchema)).set("id", 1).set("name", "value").build();
    FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, readSchema, ImmutableMap.of());
    RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
    // Convert an event that has schema associated
    StructuredRecord projectedRecord = format.read(toStreamEvent(record, true));
    Assert.assertEquals(record.get("name").toString(), projectedRecord.get("name").toString());
    // Convert an event that has no schema associated. The record must be written with the read schema.
    record = new GenericRecordBuilder(convertSchema(readSchema)).set("name", "value2").build();
    projectedRecord = format.read(toStreamEvent(record));
    Assert.assertEquals(record.get("name").toString(), projectedRecord.get("name").toString());
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 55 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class AvroRecordFormatTest method testMultipleReads.

@Test
public void testMultipleReads() throws Exception {
    Schema schema = Schema.recordOf("record", Schema.Field.of("x", Schema.of(Schema.Type.INT)));
    FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.emptyMap());
    org.apache.avro.Schema avroSchema = convertSchema(schema);
    RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
    GenericRecord record = new GenericRecordBuilder(avroSchema).set("x", 5).build();
    StructuredRecord actual = format.read(toStreamEvent(record));
    Assert.assertEquals(5, (int) actual.get("x"));
    record = new GenericRecordBuilder(avroSchema).set("x", 10).build();
    actual = format.read(toStreamEvent(record));
    Assert.assertEquals(10, (int) actual.get("x"));
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Aggregations

StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)84 Test (org.junit.Test)65 Location (org.apache.twill.filesystem.Location)27 StreamId (co.cask.cdap.proto.id.StreamId)24 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)19 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)17 Schema (co.cask.cdap.api.data.schema.Schema)10 IOException (java.io.IOException)9 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)8 ByteBuffer (java.nio.ByteBuffer)8 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)7 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)6 TransactionContext (org.apache.tephra.TransactionContext)6 BinaryDecoder (co.cask.cdap.common.io.BinaryDecoder)5 TypeToken (com.google.common.reflect.TypeToken)5 StreamEventCodec (co.cask.cdap.common.stream.StreamEventCodec)4 IdentityStreamEventDecoder (co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder)4 File (java.io.File)4 SchemaHash (co.cask.cdap.api.data.schema.SchemaHash)3 QueueName (co.cask.cdap.common.queue.QueueName)3