Search in sources :

Example 21 with FormatSpecification

use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class DelimitedStringsRecordFormatTest method testFormatRecordWithSchema.

@Test
public void testFormatRecordWithSchema() throws UnsupportedTypeException, UnexpectedFormatException {
    Schema schema = Schema.recordOf("event", Schema.Field.of("f1", Schema.unionOf(Schema.of(Schema.Type.BOOLEAN), Schema.of(Schema.Type.NULL))), Schema.Field.of("f2", Schema.unionOf(Schema.of(Schema.Type.INT), Schema.of(Schema.Type.NULL))), Schema.Field.of("f3", Schema.unionOf(Schema.of(Schema.Type.FLOAT), Schema.of(Schema.Type.NULL))), Schema.Field.of("f4", Schema.unionOf(Schema.of(Schema.Type.DOUBLE), Schema.of(Schema.Type.NULL))), Schema.Field.of("f5", Schema.unionOf(Schema.of(Schema.Type.BYTES), Schema.of(Schema.Type.NULL))), Schema.Field.of("f6", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL))), Schema.Field.of("f7", Schema.arrayOf(Schema.of(Schema.Type.STRING))));
    DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
    FormatSpecification spec = new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(), schema, ImmutableMap.of(DelimitedStringsRecordFormat.DELIMITER, ","));
    format.initialize(spec);
    boolean booleanVal = false;
    int intVal = Integer.MAX_VALUE;
    float floatVal = Float.MAX_VALUE;
    double doubleVal = Double.MAX_VALUE;
    byte[] bytesVal = new byte[] { 0, 1, 2 };
    String stringVal = "foo bar";
    String[] arrayVal = new String[] { "extra1", "extra2", "extra3" };
    String body = new StringBuilder().append(booleanVal).append(",").append(intVal).append(",").append(floatVal).append(",").append(doubleVal).append(",").append(Bytes.toStringBinary(bytesVal)).append(",").append(stringVal).append(",").append(arrayVal[0]).append(",").append(arrayVal[1]).append(",").append(arrayVal[2]).toString();
    StructuredRecord output = format.read(ByteBuffer.wrap(Bytes.toBytes(body)));
    Assert.assertEquals(booleanVal, output.get("f1"));
    Assert.assertEquals(intVal, (int) output.get("f2"));
    Assert.assertEquals(floatVal, output.get("f3"), 0.0001f);
    Assert.assertEquals(doubleVal, output.get("f4"), 0.0001d);
    Assert.assertArrayEquals(bytesVal, (byte[]) output.get("f5"));
    Assert.assertEquals(stringVal, output.get("f6"));
    Assert.assertArrayEquals(arrayVal, (String[]) output.get("f7"));
    // now try with null fields.
    output = format.read(ByteBuffer.wrap(Bytes.toBytes("true,,3.14159,,,hello world,extra1")));
    Assert.assertTrue((Boolean) output.get("f1"));
    Assert.assertNull(output.get("f2"));
    Assert.assertEquals(3.14159f, output.get("f3"), 0.0001f);
    Assert.assertNull(output.get("f4"));
    Assert.assertNull(output.get("f5"));
    Assert.assertEquals("hello world", output.get("f6"));
    Assert.assertArrayEquals(new String[] { "extra1" }, (String[]) output.get("f7"));
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) FormatSpecification(io.cdap.cdap.api.data.format.FormatSpecification) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 22 with FormatSpecification

use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class DelimitedStringsRecordFormatTest method testRecordMappingTooManyMappings.

@Test(expected = IllegalArgumentException.class)
public void testRecordMappingTooManyMappings() throws UnsupportedTypeException {
    Schema arraySchema = Schema.arrayOf(Schema.of(Schema.Type.STRING));
    Schema schema = Schema.recordOf("event", Schema.Field.of("f1", arraySchema));
    FormatSpecification formatSpec = new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(), schema, ImmutableMap.of(DelimitedStringsRecordFormat.MAPPING, "0:f1,1:f2"));
    DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
    format.initialize(formatSpec);
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) FormatSpecification(io.cdap.cdap.api.data.format.FormatSpecification) Test(org.junit.Test)

Example 23 with FormatSpecification

use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class GrokRecordFormatTest method testSimple.

@Test
public void testSimple() throws Exception {
    Schema schema = Schema.recordOf("streamEvent", Schema.Field.of("user", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("body", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    FormatSpecification spec = new FormatSpecification(Formats.GROK, schema, GrokRecordFormat.settings("%{USER:user}:%{GREEDYDATA:body}"));
    RecordFormat<ByteBuffer, StructuredRecord> format = RecordFormats.createInitializedFormat(spec);
    String message = "nitin:falkfjaksjf fkafjalkf fa fasfsalfsaf af afaslkfjasf asf af asf";
    StructuredRecord record = format.read(ByteBuffer.wrap(Bytes.toBytes(message)));
    Assert.assertEquals("nitin", record.get("user"));
    Assert.assertEquals("falkfjaksjf fkafjalkf fa fasfsalfsaf af afaslkfjasf asf af asf", record.get("body"));
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) FormatSpecification(io.cdap.cdap.api.data.format.FormatSpecification) ByteBuffer(java.nio.ByteBuffer) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 24 with FormatSpecification

use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class DelimitedStringsRecordFormatTest method testArrayOfNullableStringsSchema.

@Test
public void testArrayOfNullableStringsSchema() throws UnsupportedTypeException {
    Schema schema = Schema.recordOf("event", Schema.Field.of("arr", Schema.arrayOf(Schema.nullableOf(Schema.of(Schema.Type.STRING)))));
    DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
    FormatSpecification formatSpec = new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(), schema, Collections.<String, String>emptyMap());
    format.initialize(formatSpec);
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) FormatSpecification(io.cdap.cdap.api.data.format.FormatSpecification) Test(org.junit.Test)

Example 25 with FormatSpecification

use of io.cdap.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class DelimitedStringsRecordFormatTest method testSimpleArraySchemaValidation.

@Test
public void testSimpleArraySchemaValidation() throws UnsupportedTypeException {
    Schema schema = Schema.recordOf("event", Schema.Field.of("f1", Schema.of(Schema.Type.BOOLEAN)), Schema.Field.of("f2", Schema.of(Schema.Type.INT)), Schema.Field.of("f3", Schema.of(Schema.Type.FLOAT)), Schema.Field.of("f4", Schema.of(Schema.Type.DOUBLE)), Schema.Field.of("f5", Schema.of(Schema.Type.BYTES)), Schema.Field.of("f6", Schema.of(Schema.Type.STRING)), Schema.Field.of("f7", Schema.arrayOf(Schema.of(Schema.Type.STRING))));
    FormatSpecification formatSpec = new FormatSpecification(DelimitedStringsRecordFormat.class.getCanonicalName(), schema, Collections.<String, String>emptyMap());
    DelimitedStringsRecordFormat format = new DelimitedStringsRecordFormat();
    format.initialize(formatSpec);
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) FormatSpecification(io.cdap.cdap.api.data.format.FormatSpecification) Test(org.junit.Test)

Aggregations

FormatSpecification (io.cdap.cdap.api.data.format.FormatSpecification)27 Test (org.junit.Test)26 Schema (io.cdap.cdap.api.data.schema.Schema)18 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)15 ByteBuffer (java.nio.ByteBuffer)8 GenericRecord (org.apache.avro.generic.GenericRecord)3 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)3 RecordFormat (io.cdap.cdap.api.data.format.RecordFormat)1