Search in sources :

Example 41 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class JsonStructuredRecordDatumReader method decodeUnion.

@Override
protected Object decodeUnion(Decoder decoder, Schema unionSchema) throws IOException {
    JsonReader jsonReader = getJsonReader(decoder);
    JsonToken token = jsonReader.peek();
    // Based on the token to guess the schema
    for (Schema schema : unionSchema.getUnionSchemas()) {
        if (SCHEMA_TO_JSON_TYPE.get(schema.getType()) == token) {
            return decode(decoder, schema);
        }
    }
    throw new IOException(String.format("No matching schema found for union type: %s for token: %s", unionSchema, token));
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) JsonReader(com.google.gson.stream.JsonReader) JsonToken(com.google.gson.stream.JsonToken) IOException(java.io.IOException)

Example 42 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class JsonStructuredRecordDatumReader method decodeRecord.

@Override
protected StructuredRecord decodeRecord(Decoder decoder, Schema schema) throws IOException {
    StructuredRecord.Builder builder = StructuredRecord.builder(schema);
    JsonReader jsonReader = getJsonReader(decoder);
    jsonReader.beginObject();
    while (jsonReader.peek() != JsonToken.END_OBJECT) {
        Schema.Field field = schema.getField(jsonReader.nextName());
        if (field == null) {
            // Ignore unrecognized fields
            jsonReader.skipValue();
            continue;
        }
        builder.set(field.getName(), decode(decoder, field.getSchema()));
    }
    jsonReader.endObject();
    return builder.build();
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) JsonReader(com.google.gson.stream.JsonReader) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Example 43 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class AvroRecordFormatTest method testFlatRecord.

@Test
public void testFlatRecord() throws Exception {
    Schema schema = Schema.recordOf("record", Schema.Field.of("int", Schema.of(Schema.Type.INT)), Schema.Field.of("long", Schema.of(Schema.Type.LONG)), Schema.Field.of("boolean", Schema.of(Schema.Type.BOOLEAN)), Schema.Field.of("bytes", Schema.of(Schema.Type.BYTES)), Schema.Field.of("double", Schema.of(Schema.Type.DOUBLE)), Schema.Field.of("float", Schema.of(Schema.Type.FLOAT)), Schema.Field.of("string", Schema.of(Schema.Type.STRING)), Schema.Field.of("array", Schema.arrayOf(Schema.of(Schema.Type.INT))), Schema.Field.of("map", Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.INT))), Schema.Field.of("nullable", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL))), Schema.Field.of("nullable2", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL))));
    FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, schema, Collections.<String, String>emptyMap());
    org.apache.avro.Schema avroSchema = convertSchema(schema);
    GenericRecord record = new GenericRecordBuilder(avroSchema).set("int", Integer.MAX_VALUE).set("long", Long.MAX_VALUE).set("boolean", false).set("bytes", Charsets.UTF_8.encode("hello world")).set("double", Double.MAX_VALUE).set("float", Float.MAX_VALUE).set("string", "foo bar").set("array", Lists.newArrayList(1, 2, 3)).set("map", ImmutableMap.of("k1", 1, "k2", 2)).set("nullable", null).set("nullable2", "Hello").build();
    RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
    StructuredRecord actual = format.read(toStreamEvent(record));
    Assert.assertEquals(Integer.MAX_VALUE, actual.get("int"));
    Assert.assertEquals(Long.MAX_VALUE, actual.get("long"));
    Assert.assertFalse((Boolean) actual.get("boolean"));
    Assert.assertArrayEquals(Bytes.toBytes("hello world"), Bytes.toBytes((ByteBuffer) actual.get("bytes")));
    Assert.assertEquals(Double.MAX_VALUE, actual.get("double"));
    Assert.assertEquals(Float.MAX_VALUE, actual.get("float"));
    Assert.assertEquals("foo bar", actual.get("string"));
    Assert.assertEquals(Lists.newArrayList(1, 2, 3), actual.get("array"));
    assertMapEquals(ImmutableMap.<String, Object>of("k1", 1, "k2", 2), (Map<Object, Object>) actual.get("map"));
    Assert.assertNull(actual.get("nullable"));
    Assert.assertEquals("Hello", actual.get("nullable2"));
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) ByteBuffer(java.nio.ByteBuffer) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 44 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class CombinedLogRecordFormat method validateSchema.

@Override
protected void validateSchema(Schema desiredSchema) throws UnsupportedTypeException {
    // a valid schema is a record of simple types.
    Iterator<Schema.Field> fields = desiredSchema.getFields().iterator();
    while (fields.hasNext()) {
        Schema.Field field = fields.next();
        Schema schema = field.getSchema();
        boolean isSimple = schema.getType().isSimpleType();
        boolean isNullableSimple = schema.isNullableSimple();
        if (!isSimple && !isNullableSimple) {
            throw new UnsupportedTypeException("Field " + field.getName() + " is of invalid type.");
        }
    }
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) UnsupportedTypeException(co.cask.cdap.api.data.schema.UnsupportedTypeException)

Example 45 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class GrokRecordFormat method read.

@Override
public StructuredRecord read(StreamEvent event) throws UnexpectedFormatException {
    String bodyAsStr = Bytes.toString(event.getBody(), Charsets.UTF_8);
    StructuredRecord.Builder builder = StructuredRecord.builder(schema);
    Match gm = grok.match(bodyAsStr);
    gm.captures();
    Map<String, Object> x = gm.toMap();
    for (Schema.Field field : schema.getFields()) {
        String fieldName = field.getName();
        Object value = x.get(fieldName);
        if (value != null) {
            builder.convertAndSet(fieldName, value.toString());
        }
    }
    return builder.build();
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Match(oi.thekraken.grok.api.Match)

Aggregations

Schema (co.cask.cdap.api.data.schema.Schema)210 Test (org.junit.Test)92 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)69 Table (co.cask.cdap.api.dataset.table.Table)38 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)35 ApplicationId (co.cask.cdap.proto.id.ApplicationId)34 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)32 ApplicationManager (co.cask.cdap.test.ApplicationManager)30 AppRequest (co.cask.cdap.proto.artifact.AppRequest)29 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)24 IOException (java.io.IOException)23 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)22 ReflectionSchemaGenerator (co.cask.cdap.internal.io.ReflectionSchemaGenerator)22 ArrayList (java.util.ArrayList)22 WorkflowManager (co.cask.cdap.test.WorkflowManager)20 Map (java.util.Map)18 Set (java.util.Set)14 UnsupportedTypeException (co.cask.cdap.api.data.schema.UnsupportedTypeException)12 HashMap (java.util.HashMap)12 HashSet (java.util.HashSet)11