Search in sources :

Example 76 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class AvroRecordFormatTest method testSchemaProjection.

@Test
public void testSchemaProjection() throws Exception {
    Schema sourceSchema = Schema.recordOf("source", Schema.Field.of("id", Schema.of(Schema.Type.INT)), Schema.Field.of("name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    Schema readSchema = Schema.recordOf("read", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    GenericRecord record = new GenericRecordBuilder(convertSchema(sourceSchema)).set("id", 1).set("name", "value").build();
    FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, readSchema, ImmutableMap.<String, String>of());
    RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
    // Convert an event that has schema associated
    StructuredRecord projectedRecord = format.read(toStreamEvent(record, true));
    Assert.assertEquals(record.get("name").toString(), projectedRecord.get("name").toString());
    // Convert an event that has no schema associated. The record must be written with the read schema.
    record = new GenericRecordBuilder(convertSchema(readSchema)).set("name", "value2").build();
    projectedRecord = format.read(toStreamEvent(record));
    Assert.assertEquals(record.get("name").toString(), projectedRecord.get("name").toString());
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 77 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class CombinedLogRecordFormatTest method testCLFLog.

@Test
public void testCLFLog() throws UnsupportedTypeException, UnexpectedFormatException {
    CombinedLogRecordFormat format = new CombinedLogRecordFormat();
    FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.<String, String>of());
    format.initialize(spec);
    String data = "10.10.10.10 - - [01/Feb/2015:06:47:10 +0000] \"GET /browse/COOP-DBT-JOB1-238/artifact HTTP/1.1\"" + " 301 256 \"-\" \"Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/)\"";
    StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(data))));
    Assert.assertEquals("10.10.10.10", output.get("remote_host"));
    Assert.assertNull(output.get("remote_login"));
    Assert.assertNull(output.get("auth_user"));
    Assert.assertEquals("01/Feb/2015:06:47:10 +0000", output.get("request_time"));
    Assert.assertEquals("GET /browse/COOP-DBT-JOB1-238/artifact HTTP/1.1", output.get("request"));
    Assert.assertEquals(301, output.get("status"));
    Assert.assertEquals(256, output.get("content_length"));
    Assert.assertNull(output.get("referrer"));
    Assert.assertEquals("Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/)", output.get("user_agent"));
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 78 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class CombinedLogRecordFormatTest method testInvalid.

@Test(expected = UnexpectedFormatException.class)
public void testInvalid() throws UnsupportedTypeException, UnexpectedFormatException {
    CombinedLogRecordFormat format = new CombinedLogRecordFormat();
    FormatSpecification spec = new FormatSpecification(CombinedLogRecordFormat.class.getCanonicalName(), null, ImmutableMap.<String, String>of());
    format.initialize(spec);
    String data = "10.10.10.10[01/Feb/2015:06:47:10 +0000";
    StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(data))));
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 79 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class DelimitedStringsRecordFormatTest method testFormatRecordWithSchema.

@Test
public void testFormatRecordWithSchema() throws UnsupportedTypeException, UnexpectedFormatException {
    Schema schema = Schema.recordOf("event", Schema.Field.of("f1", Schema.unionOf(Schema.of(Schema.Type.BOOLEAN), Schema.of(Schema.Type.NULL))), Schema.Field.of("f2", Schema.unionOf(Schema.of(Schema.Type.INT), Schema.of(Schema.Type.NULL))), Schema.Field.of("f3", Schema.unionOf(Schema.of(Schema.Type.FLOAT), Schema.of(Schema.Type.NULL))), Schema.Field.of("f4", Schema.unionOf(Schema.of(Schema.Type.DOUBLE), Schema.of(Schema.Type.NULL))), Schema.Field.of("f5", Schema.unionOf(Schema.of(Schema.Type.BYTES), Schema.of(Schema.Type.NULL))), Schema.Field.of("f6", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL))), Schema.Field.of("f7", Schema.arrayOf(Schema.of(Schema.Type.STRING))));
    DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
    FormatSpecification spec = new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(), schema, ImmutableMap.of(DelimitedStringsRecordFormat.DELIMITER, ","));
    format.initialize(spec);
    boolean booleanVal = false;
    int intVal = Integer.MAX_VALUE;
    float floatVal = Float.MAX_VALUE;
    double doubleVal = Double.MAX_VALUE;
    byte[] bytesVal = new byte[] { 0, 1, 2 };
    String stringVal = "foo bar";
    String[] arrayVal = new String[] { "extra1", "extra2", "extra3" };
    String body = new StringBuilder().append(booleanVal).append(",").append(intVal).append(",").append(floatVal).append(",").append(doubleVal).append(",").append(Bytes.toStringBinary(bytesVal)).append(",").append(stringVal).append(",").append(arrayVal[0]).append(",").append(arrayVal[1]).append(",").append(arrayVal[2]).toString();
    StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(body))));
    Assert.assertEquals(booleanVal, output.get("f1"));
    Assert.assertEquals(intVal, output.get("f2"));
    Assert.assertEquals(floatVal, output.get("f3"));
    Assert.assertEquals(doubleVal, output.get("f4"));
    Assert.assertArrayEquals(bytesVal, (byte[]) output.get("f5"));
    Assert.assertEquals(stringVal, output.get("f6"));
    Assert.assertArrayEquals(arrayVal, (String[]) output.get("f7"));
    // now try with null fields.
    output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes("true,,3.14159,,,hello world,extra1"))));
    Assert.assertTrue((Boolean) output.get("f1"));
    Assert.assertNull(output.get("f2"));
    Assert.assertTrue(Math.abs(3.14159 - (Float) output.get("f3")) < 0.000001);
    Assert.assertNull(output.get("f4"));
    Assert.assertNull(output.get("f5"));
    Assert.assertEquals("hello world", output.get("f6"));
    Assert.assertArrayEquals(new String[] { "extra1" }, (String[]) output.get("f7"));
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 80 with StructuredRecord

use of co.cask.cdap.api.data.format.StructuredRecord in project cdap by caskdata.

the class DelimitedStringsRecordFormatTest method testCSV.

@Test
public void testCSV() throws Exception {
    FormatSpecification spec = new FormatSpecification(Formats.CSV, null, Collections.<String, String>emptyMap());
    RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(spec);
    String body = "userX,actionY,itemZ";
    StructuredRecord output = format.read(new StreamEvent(ByteBuffer.wrap(Bytes.toBytes(body))));
    String[] actual = output.get("body");
    String[] expected = body.split(",");
    Assert.assertArrayEquals(expected, actual);
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Aggregations

StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)97 Schema (co.cask.cdap.api.data.schema.Schema)71 Test (org.junit.Test)51 Table (co.cask.cdap.api.dataset.table.Table)36 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)36 ApplicationId (co.cask.cdap.proto.id.ApplicationId)36 ApplicationManager (co.cask.cdap.test.ApplicationManager)33 AppRequest (co.cask.cdap.proto.artifact.AppRequest)31 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)25 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)25 WorkflowManager (co.cask.cdap.test.WorkflowManager)23 ArrayList (java.util.ArrayList)20 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)19 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)18 HashSet (java.util.HashSet)10 DataStreamsConfig (co.cask.cdap.etl.proto.v2.DataStreamsConfig)8 File (java.io.File)8 TimeoutException (java.util.concurrent.TimeoutException)8 Put (co.cask.cdap.api.dataset.table.Put)7 ETLPlugin (co.cask.cdap.etl.proto.v2.ETLPlugin)7