Search in sources :

Example 36 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class ReflectionDatumReader method readUnion.

@Override
protected Object readUnion(Decoder decoder, Schema sourceSchema, Schema targetSchema, TypeToken<?> targetTypeToken) throws IOException {
    int idx = decoder.readInt();
    Schema sourceValueSchema = sourceSchema.getUnionSchemas().get(idx);
    if (targetSchema.getType() == Schema.Type.UNION) {
        try {
            // A simple optimization to try resolve before resorting to linearly try the union schema.
            Schema targetValueSchema = targetSchema.getUnionSchema(idx);
            if (targetValueSchema != null && targetValueSchema.getType() == sourceValueSchema.getType()) {
                return read(decoder, sourceValueSchema, targetValueSchema, targetTypeToken);
            }
        } catch (IOException e) {
        // OK to ignore it, as we'll do union schema resolution
        }
        for (Schema targetValueSchema : targetSchema.getUnionSchemas()) {
            try {
                return read(decoder, sourceValueSchema, targetValueSchema, targetTypeToken);
            } catch (IOException e) {
            // It's ok to have exception here, as we'll keep trying until exhausted the target union.
            }
        }
        throw new IOException(String.format("Fail to resolve %s to %s", sourceSchema, targetSchema));
    } else {
        return read(decoder, sourceValueSchema, targetSchema, targetTypeToken);
    }
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) IOException(java.io.IOException)

Example 37 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class ReflectionDatumWriter method writeMap.

@Override
protected void writeMap(Encoder encoder, Map<?, ?> map, Map.Entry<Schema, Schema> mapSchema) throws IOException {
    int size = map.size();
    encoder.writeInt(size);
    Schema keySchema = mapSchema.getKey();
    Schema valSchema = mapSchema.getValue();
    for (Map.Entry<?, ?> entry : map.entrySet()) {
        write(encoder, entry.getKey(), keySchema);
        write(encoder, entry.getValue(), valSchema);
    }
    if (size > 0) {
        encoder.writeInt(0);
    }
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) Map(java.util.Map)

Example 38 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class ReflectionRowReader method readUnion.

@Override
protected Object readUnion(Row row, Schema sourceSchema, Schema targetSchema, TypeToken<?> targetTypeToken) throws IOException {
    // assumption is that unions are only possible if they represent a nullable.
    if (!sourceSchema.isNullable()) {
        throw new UnsupportedOperationException("Unions that do not represent nullables are not supported.");
    }
    String name = getCurrentField();
    Schema sourceValueSchema = row.get(name) == null ? NULL_SCHEMA : sourceSchema.getNonNullable();
    if (targetSchema.getType() == Schema.Type.UNION) {
        for (Schema targetValueSchema : targetSchema.getUnionSchemas()) {
            try {
                return read(row, sourceValueSchema, targetValueSchema, targetTypeToken);
            } catch (IOException e) {
            // It's ok to have exception here, as we'll keep trying until exhausted the target union.
            }
        }
        throw new IOException(String.format("Fail to resolve %s to %s", sourceSchema, targetSchema));
    } else {
        return read(row, sourceValueSchema, targetSchema, targetTypeToken);
    }
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) IOException(java.io.IOException)

Example 39 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class ReflectionWriter method writeRecord.

protected void writeRecord(WRITER writer, Object record, Schema recordSchema) throws IOException {
    try {
        TypeToken<?> type = TypeToken.of(record.getClass());
        Map<String, Method> methods = collectByMethod(type, Maps.<String, Method>newHashMap());
        Map<String, Field> fields = collectByFields(type, Maps.<String, Field>newHashMap());
        for (Schema.Field field : recordSchema.getFields()) {
            String fieldName = field.getName();
            Object value;
            Field recordField = fields.get(fieldName);
            if (recordField != null) {
                recordField.setAccessible(true);
                value = recordField.get(record);
            } else {
                Method method = methods.get(fieldName);
                if (method == null) {
                    throw new IOException("Unable to read field value through getter. Class=" + type + ", field=" + fieldName);
                }
                value = method.invoke(record);
            }
            Schema fieldSchema = field.getSchema();
            write(writer, value, fieldSchema);
        }
    } catch (Exception e) {
        if (e instanceof IOException) {
            throw (IOException) e;
        }
        throw new IOException(e);
    }
}
Also used : Field(java.lang.reflect.Field) Schema(co.cask.cdap.api.data.schema.Schema) Method(java.lang.reflect.Method) IOException(java.io.IOException) IOException(java.io.IOException)

Example 40 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class UserProfiles method configure.

@Override
public void configure() {
    setName("UserProfiles");
    setDescription("Demonstrates the use of column-level conflict detection");
    addStream(new Stream("events"));
    addFlow(new ActivityFlow());
    addService(new UserProfileService());
    createDataset("counters", KeyValueTable.class, DatasetProperties.builder().setDescription("Counters key-value table").build());
    // create the profiles table with a schema so that it can be explored via Hive
    Schema profileSchema = Schema.recordOf("profile", // id, name, and email are never null and are set when a user profile is created
    Schema.Field.of("id", Schema.of(Schema.Type.STRING)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("email", Schema.of(Schema.Type.STRING)), // login and active are never set when a profile is created but are set later, so they are nullable.
    Schema.Field.of("login", Schema.nullableOf(Schema.of(Schema.Type.LONG))), Schema.Field.of("active", Schema.nullableOf(Schema.of(Schema.Type.LONG))));
    createDataset("profiles", Table.class.getName(), TableProperties.builder().setConflictDetection(ConflictDetection.COLUMN).setSchema(profileSchema).setRowFieldName("id").setDescription("Profiles table with column-level conflict detection").build());
}
Also used : Table(co.cask.cdap.api.dataset.table.Table) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Schema(co.cask.cdap.api.data.schema.Schema) Stream(co.cask.cdap.api.data.stream.Stream)

Aggregations

Schema (co.cask.cdap.api.data.schema.Schema)210 Test (org.junit.Test)92 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)69 Table (co.cask.cdap.api.dataset.table.Table)38 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)35 ApplicationId (co.cask.cdap.proto.id.ApplicationId)34 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)32 ApplicationManager (co.cask.cdap.test.ApplicationManager)30 AppRequest (co.cask.cdap.proto.artifact.AppRequest)29 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)24 IOException (java.io.IOException)23 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)22 ReflectionSchemaGenerator (co.cask.cdap.internal.io.ReflectionSchemaGenerator)22 ArrayList (java.util.ArrayList)22 WorkflowManager (co.cask.cdap.test.WorkflowManager)20 Map (java.util.Map)18 Set (java.util.Set)14 UnsupportedTypeException (co.cask.cdap.api.data.schema.UnsupportedTypeException)12 HashMap (java.util.HashMap)12 HashSet (java.util.HashSet)11