Search in sources :

Example 6 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class DelimitedStringsRecordFormatTest method testStringArrayFormat.

@Test
public void testStringArrayFormat() throws UnsupportedTypeException, UnexpectedFormatException {
    DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
    format.initialize(null);
    String body = "userX,actionY,itemZ";
    StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(body))));
    String[] actual = output.get("body");
    String[] expected = body.split(",");
    Assert.assertTrue(Arrays.equals(expected, actual));
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 7 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class AvroRecordFormatTest method toStreamEvent.

private StreamEvent toStreamEvent(GenericRecord record, boolean writeSchema) throws IOException {
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(record.getSchema());
    writer.write(record, encoder);
    encoder.flush();
    out.close();
    byte[] serializedRecord = out.toByteArray();
    String schemaString = record.getSchema().toString();
    Map<String, String> headers = Maps.newHashMap();
    if (writeSchema) {
        headers.put(AvroRecordFormat.SCHEMA, schemaString);
        headers.put(AvroRecordFormat.SCHEMA_HASH, Hashing.md5().hashString(schemaString, Charsets.UTF_8).toString());
    }
    return new StreamEvent(headers, ByteBuffer.wrap(serializedRecord));
}
Also used : BinaryEncoder(org.apache.avro.io.BinaryEncoder) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 8 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class AvroRecordFormatTest method testFlatRecord.

@Test
public void testFlatRecord() throws Exception {
    Schema schema = Schema.recordOf("record", Schema.Field.of("int", Schema.of(Schema.Type.INT)), Schema.Field.of("long", Schema.of(Schema.Type.LONG)), Schema.Field.of("boolean", Schema.of(Schema.Type.BOOLEAN)), Schema.Field.of("bytes", Schema.of(Schema.Type.BYTES)), Schema.Field.of("double", Schema.of(Schema.Type.DOUBLE)), Schema.Field.of("float", Schema.of(Schema.Type.FLOAT)), Schema.Field.of("string", Schema.of(Schema.Type.STRING)), Schema.Field.of("array", Schema.arrayOf(Schema.of(Schema.Type.INT))), Schema.Field.of("map", Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.INT))), Schema.Field.of("nullable", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL))), Schema.Field.of("nullable2", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL))));
    FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.emptyMap());
    org.apache.avro.Schema avroSchema = convertSchema(schema);
    GenericRecord record = new GenericRecordBuilder(avroSchema).set("int", Integer.MAX_VALUE).set("long", Long.MAX_VALUE).set("boolean", false).set("bytes", Charsets.UTF_8.encode("hello world")).set("double", Double.MAX_VALUE).set("float", Float.MAX_VALUE).set("string", "foo bar").set("array", Lists.newArrayList(1, 2, 3)).set("map", ImmutableMap.of("k1", 1, "k2", 2)).set("nullable", null).set("nullable2", "Hello").build();
    RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
    StructuredRecord actual = format.read(toStreamEvent(record));
    Assert.assertEquals(Integer.MAX_VALUE, (int) actual.get("int"));
    Assert.assertEquals(Long.MAX_VALUE, (long) actual.get("long"));
    Assert.assertFalse(actual.get("boolean"));
    Assert.assertArrayEquals(Bytes.toBytes("hello world"), Bytes.toBytes((ByteBuffer) actual.get("bytes")));
    Assert.assertEquals(Double.MAX_VALUE, actual.get("double"), 0.0001d);
    Assert.assertEquals(Float.MAX_VALUE, actual.get("float"), 0.0001f);
    Assert.assertEquals("foo bar", actual.get("string"));
    Assert.assertEquals(Lists.newArrayList(1, 2, 3), actual.get("array"));
    assertMapEquals(ImmutableMap.of("k1", 1, "k2", 2), actual.get("map"));
    Assert.assertNull(actual.get("nullable"));
    Assert.assertEquals("Hello", actual.get("nullable2"));
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) ByteBuffer(java.nio.ByteBuffer) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 9 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class CombinedLogRecordFormatTest method testCLFLogWithEscapedDoubleQuotes.

@Test
public void testCLFLogWithEscapedDoubleQuotes() throws UnsupportedTypeException, UnexpectedFormatException {
    CombinedLogRecordFormat format = new CombinedLogRecordFormat();
    FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.of());
    format.initialize(spec);
    String data = "10.10.10.10 - - [01/Feb/2015:06:38:58 +0000] \"GET /plugins/servlet/buildStatusImage/CDAP-DUT " + "HTTP/1.1\" 301 257 \"http://cdap.io/\" \"\\\"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, " + "like Gecko) Chrome/31.0.1650.57 Safari/537.36 OPR/18.0.1284.49\\\"\"";
    StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(data))));
    Assert.assertEquals("10.10.10.10", output.get("remote_host"));
    Assert.assertNull(output.get("remote_login"));
    Assert.assertNull(output.get("auth_user"));
    Assert.assertEquals("01/Feb/2015:06:38:58 +0000", output.get("request_time"));
    Assert.assertEquals("GET /plugins/servlet/buildStatusImage/CDAP-DUT HTTP/1.1", output.get("request"));
    Assert.assertEquals(301, (int) output.get("status"));
    Assert.assertEquals(257, (int) output.get("content_length"));
    Assert.assertEquals("http://cdap.io/", output.get("referrer"));
    Assert.assertEquals("\\\"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/31.0.1650.57 Safari/537.36 OPR/18.0.1284.49\\\"", output.get("user_agent"));
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 10 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class CombinedLogRecordFormatTest method testCLFLogWithNull.

@Test
public void testCLFLogWithNull() throws UnsupportedTypeException, UnexpectedFormatException {
    CombinedLogRecordFormat format = new CombinedLogRecordFormat();
    FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.of());
    format.initialize(spec);
    String data = "10.10.10.10 - - [01/Feb/2015:09:58:24 +0000] \"-\" 408 - \"-\" \"-\"";
    StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(data))));
    Assert.assertEquals("10.10.10.10", output.get("remote_host"));
    Assert.assertNull(output.get("remote_login"));
    Assert.assertNull(output.get("auth_user"));
    Assert.assertEquals("01/Feb/2015:09:58:24 +0000", output.get("request_time"));
    Assert.assertNull(output.get("request"));
    Assert.assertEquals(408, (int) output.get("status"));
    Assert.assertNull(output.get("content_length"));
    Assert.assertNull(output.get("referrer"));
    Assert.assertNull(output.get("user_agent"));
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Aggregations

StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)84 Test (org.junit.Test)65 Location (org.apache.twill.filesystem.Location)27 StreamId (co.cask.cdap.proto.id.StreamId)24 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)19 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)17 Schema (co.cask.cdap.api.data.schema.Schema)10 IOException (java.io.IOException)9 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)8 ByteBuffer (java.nio.ByteBuffer)8 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)7 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)6 TransactionContext (org.apache.tephra.TransactionContext)6 BinaryDecoder (co.cask.cdap.common.io.BinaryDecoder)5 TypeToken (com.google.common.reflect.TypeToken)5 StreamEventCodec (co.cask.cdap.common.stream.StreamEventCodec)4 IdentityStreamEventDecoder (co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder)4 File (java.io.File)4 SchemaHash (co.cask.cdap.api.data.schema.SchemaHash)3 QueueName (co.cask.cdap.common.queue.QueueName)3