Search in sources :

Example 56 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class GrokRecordFormatTest method testSyslog.

@Test
public void testSyslog() throws Exception {
    FormatSpecification spec = new FormatSpecification(Formats.SYSLOG, null, Collections.<String, String>emptyMap());
    RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(spec);
    String message = "Oct 17 08:59:00 suod newsyslog[6215]: logfile turned over";
    StructuredRecord record = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(message))));
    Assert.assertEquals("Oct 17 08:59:00", record.get("timestamp"));
    Assert.assertEquals("suod", record.get("logsource"));
    Assert.assertEquals("newsyslog", record.get("program"));
    Assert.assertEquals("6215", record.get("pid"));
    Assert.assertEquals("logfile turned over", record.get("message"));
    message = "Oct 17 08:59:04 cdr.cs.colorado.edu amd[29648]: " + "noconn option exists, and was turned on! (May cause NFS hangs on some systems...)";
    record = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(message))));
    Assert.assertEquals("Oct 17 08:59:04", record.get("timestamp"));
    Assert.assertEquals("cdr.cs.colorado.edu", record.get("logsource"));
    Assert.assertEquals("amd", record.get("program"));
    Assert.assertEquals("29648", record.get("pid"));
    Assert.assertEquals("noconn option exists, and was turned on! (May cause NFS hangs on some systems...)", record.get("message"));
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 57 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class RecordPutTransformerTest method testNullRowkeyThrowsException.

@Test(expected = IllegalArgumentException.class)
public void testNullRowkeyThrowsException() throws Exception {
    Schema schema = Schema.recordOf("record", Schema.Field.of("key", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    RecordPutTransformer transformer = new RecordPutTransformer("key", schema);
    StructuredRecord record = StructuredRecord.builder(schema).build();
    transformer.toPut(record);
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 58 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class RecordPutTransformerTest method testNullableFields.

@Test
public void testNullableFields() throws Exception {
    Schema schema = Schema.recordOf("record", Schema.Field.of("key", Schema.of(Schema.Type.INT)), Schema.Field.of("nullable", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("non_nullable", Schema.of(Schema.Type.STRING)));
    RecordPutTransformer transformer = new RecordPutTransformer("key", schema);
    // valid record
    StructuredRecord record = StructuredRecord.builder(schema).set("key", 1).set("non_nullable", "foo").build();
    Put transformed = transformer.toPut(record);
    Assert.assertEquals(1, Bytes.toInt(transformed.getRow()));
    // expect a null value for the nullable field
    Assert.assertEquals(2, transformed.getValues().size());
    Assert.assertEquals("foo", Bytes.toString(transformed.getValues().get(Bytes.toBytes("non_nullable"))));
    Assert.assertNull(transformed.getValues().get(Bytes.toBytes("nullable")));
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Put(co.cask.cdap.api.dataset.table.Put) Test(org.junit.Test)

Example 59 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class RecordPutTransformerTest method testTransform.

@Test
public void testTransform() throws Exception {
    Schema schema = Schema.recordOf("record", Schema.Field.of("boolField", Schema.nullableOf(Schema.of(Schema.Type.BOOLEAN))), Schema.Field.of("intField", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("longField", Schema.nullableOf(Schema.of(Schema.Type.LONG))), Schema.Field.of("floatField", Schema.nullableOf(Schema.of(Schema.Type.FLOAT))), Schema.Field.of("doubleField", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))), Schema.Field.of("bytesField", Schema.nullableOf(Schema.of(Schema.Type.BYTES))), Schema.Field.of("stringField", Schema.of(Schema.Type.STRING)));
    RecordPutTransformer transformer = new RecordPutTransformer("stringField", schema);
    StructuredRecord record = StructuredRecord.builder(schema).set("boolField", true).set("intField", 5).set("longField", 10L).set("floatField", 3.14f).set("doubleField", 3.14).set("bytesField", Bytes.toBytes("foo")).set("stringField", "key").build();
    Put transformed = transformer.toPut(record);
    Assert.assertEquals("key", Bytes.toString(transformed.getRow()));
    Map<byte[], byte[]> values = transformed.getValues();
    Assert.assertTrue(Bytes.toBoolean(values.get(Bytes.toBytes("boolField"))));
    Assert.assertEquals(5, Bytes.toInt(values.get(Bytes.toBytes("intField"))));
    Assert.assertEquals(10L, Bytes.toLong(values.get(Bytes.toBytes("longField"))));
    Assert.assertTrue(Math.abs(3.14f - Bytes.toFloat(values.get(Bytes.toBytes("floatField")))) < 0.000001);
    Assert.assertTrue(Math.abs(3.14 - Bytes.toDouble(values.get(Bytes.toBytes("doubleField")))) < 0.000001);
    Assert.assertArrayEquals(Bytes.toBytes("foo"), values.get(Bytes.toBytes("bytesField")));
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Put(co.cask.cdap.api.dataset.table.Put) Test(org.junit.Test)

Example 60 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class StructuredRecordBuilderTest method testDateConversion.

@Test
public void testDateConversion() {
    long ts = 0L;
    Date date = new Date(ts);
    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
    Schema schema = Schema.recordOf("x1", Schema.Field.of("ts", Schema.of(Schema.Type.LONG)), Schema.Field.of("date1", Schema.of(Schema.Type.STRING)), Schema.Field.of("date2", Schema.of(Schema.Type.STRING)));
    StructuredRecord expected = StructuredRecord.builder(schema).set("ts", 0L).set("date1", "1970-01-01T00:00:00 UTC").set("date2", "1970-01-01").build();
    StructuredRecord actual = StructuredRecord.builder(schema).convertAndSet("ts", date).convertAndSet("date1", date).convertAndSet("date2", date, dateFormat).build();
    Assert.assertEquals(expected, actual);
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) SimpleDateFormat(java.text.SimpleDateFormat) Date(java.util.Date) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Aggregations

StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)97 Schema (co.cask.cdap.api.data.schema.Schema)71 Test (org.junit.Test)51 Table (co.cask.cdap.api.dataset.table.Table)36 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)36 ApplicationId (co.cask.cdap.proto.id.ApplicationId)36 ApplicationManager (co.cask.cdap.test.ApplicationManager)33 AppRequest (co.cask.cdap.proto.artifact.AppRequest)31 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)25 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)25 WorkflowManager (co.cask.cdap.test.WorkflowManager)23 ArrayList (java.util.ArrayList)20 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)19 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)18 HashSet (java.util.HashSet)10 DataStreamsConfig (co.cask.cdap.etl.proto.v2.DataStreamsConfig)8 File (java.io.File)8 TimeoutException (java.util.concurrent.TimeoutException)8 Put (co.cask.cdap.api.dataset.table.Put)7 ETLPlugin (co.cask.cdap.etl.proto.v2.ETLPlugin)7