Search in sources :

Example 16 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project cdap by caskdata.

the class ErrorOutputWriter method getGenericRecordForInvalidEntry.

private GenericRecord getGenericRecordForInvalidEntry(InvalidEntry invalidEntry) {
    GenericRecordBuilder recordBuilder = new GenericRecordBuilder(AVRO_ERROR_SCHEMA);
    recordBuilder.set(Constants.ErrorDataset.ERRCODE, invalidEntry.getErrorCode());
    recordBuilder.set(Constants.ErrorDataset.ERRMSG, invalidEntry.getErrorMsg());
    String errorMsg;
    // As each record is emitted along with stageName for all the stages, the invalidEntry can be KeyValue pair
    if (invalidEntry.getInvalidRecord() instanceof KeyValue) {
        KeyValue<String, StructuredRecord> invalidRecord = (KeyValue<String, StructuredRecord>) invalidEntry.getInvalidRecord();
        StructuredRecord record = invalidRecord.getValue();
        try {
            errorMsg = StructuredRecordStringConverter.toJsonString(record);
        } catch (IOException e) {
            errorMsg = "Exception while converting StructuredRecord to String, " + e.getCause();
        }
    } else if (invalidEntry.getInvalidRecord() instanceof StructuredRecord) {
        StructuredRecord record = (StructuredRecord) invalidEntry.getInvalidRecord();
        try {
            errorMsg = StructuredRecordStringConverter.toJsonString(record);
        } catch (IOException e) {
            errorMsg = "Exception while converting StructuredRecord to String, " + e.getCause();
        }
    } else {
        errorMsg = String.format("Error Entry is of type %s, only a record of type %s is supported currently", invalidEntry.getInvalidRecord().getClass().getName(), StructuredRecord.class.getName());
    }
    recordBuilder.set(Constants.ErrorDataset.INVALIDENTRY, errorMsg);
    return recordBuilder.build();
}
Also used : KeyValue(co.cask.cdap.api.dataset.lib.KeyValue) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) IOException(java.io.IOException) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Example 17 with GenericRecordBuilder

use of org.apache.avro.generic.GenericRecordBuilder in project cdap by caskdata.

the class AvroRecordFormatTest method testNestedRecord.

@Test
public void testNestedRecord() throws Exception {
    Schema innerSchema = Schema.recordOf("inner", Schema.Field.of("int", Schema.of(Schema.Type.INT)), Schema.Field.of("double", Schema.of(Schema.Type.DOUBLE)), Schema.Field.of("array", Schema.arrayOf(Schema.of(Schema.Type.FLOAT))), Schema.Field.of("map", Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.STRING))));
    Schema schema = Schema.recordOf("record", Schema.Field.of("int", Schema.of(Schema.Type.INT)), Schema.Field.of("record", innerSchema));
    org.apache.avro.Schema avroInnerSchema = convertSchema(innerSchema);
    org.apache.avro.Schema avroSchema = convertSchema(schema);
    GenericRecord record = new GenericRecordBuilder(avroSchema).set("int", Integer.MAX_VALUE).set("record", new GenericRecordBuilder(avroInnerSchema).set("int", 5).set("double", 3.14159).set("array", ImmutableList.of(1.0f, 2.0f)).set("map", ImmutableMap.of("key", "value")).build()).build();
    FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.<String, String>emptyMap());
    RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
    StructuredRecord actual = format.read(toStreamEvent(record));
    Assert.assertEquals(Integer.MAX_VALUE, actual.get("int"));
    StructuredRecord actualInner = actual.get("record");
    Assert.assertEquals(5, actualInner.get("int"));
    Assert.assertEquals(3.14159, actualInner.get("double"));
    List<Float> array = actualInner.get("array");
    Assert.assertEquals(ImmutableList.of(1.0f, 2.0f), array);
    Map<String, String> map = actualInner.get("map");
    Assert.assertEquals(ImmutableMap.of("key", "value"), map);
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Aggregations

GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)17 GenericRecord (org.apache.avro.generic.GenericRecord)14 DatasetRepository (com.cloudera.cdk.data.DatasetRepository)6 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)5 Schema (co.cask.cdap.api.data.schema.Schema)5 Random (java.util.Random)5 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)4 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)4 DatasetDescriptor (com.cloudera.cdk.data.DatasetDescriptor)4 Test (org.junit.Test)4 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)3 PartitionStrategy (com.cloudera.cdk.data.PartitionStrategy)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 IOException (java.io.IOException)2 Schema (org.apache.avro.Schema)2 GenericData (org.apache.avro.generic.GenericData)2 BinaryEncoder (org.apache.avro.io.BinaryEncoder)2 KeyValue (co.cask.cdap.api.dataset.lib.KeyValue)1 File (java.io.File)1 PrintWriter (java.io.PrintWriter)1