Search in sources :

Example 31 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class DatumWriterGenerator method encodeUnion.

/**
   * Generates method body for encoding union schema. Union schema is used for representing object references that
   * could be {@code null}.
   * @param mg
   * @param outputType
   * @param schema
   * @param value
   * @param encoder
   * @param schemaLocal
   * @param seenRefs
   */
private void encodeUnion(GeneratorAdapter mg, TypeToken<?> outputType, Schema schema, int value, int encoder, int schemaLocal, int seenRefs) {
    Label nullLabel = mg.newLabel();
    Label endLabel = mg.newLabel();
    mg.loadArg(value);
    mg.ifNull(nullLabel);
    // Not null, write out 0 and then encode the value
    encodeInt(mg, 0, encoder);
    mg.loadThis();
    mg.loadArg(value);
    doCast(mg, outputType, schema.getUnionSchema(0));
    mg.loadArg(encoder);
    mg.loadArg(schemaLocal);
    mg.push(0);
    mg.invokeVirtual(Type.getType(Schema.class), getMethod(Schema.class, "getUnionSchema", int.class));
    mg.loadArg(seenRefs);
    mg.invokeVirtual(classType, getEncodeMethod(outputType, schema.getUnionSchema(0)));
    mg.goTo(endLabel);
    mg.mark(nullLabel);
    // Null, write out 1
    encodeInt(mg, 1, encoder);
    mg.mark(endLabel);
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) Label(org.objectweb.asm.Label)

Example 32 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class DatumWriterGenerator method encodeCollection.

/**
   * Generates method body for encoding Collection value. The logic is like this:
   *
   * <pre>
   * {@code
   *
   * encoder.writeInt(collection.size());
   * for (T element : collection) {
   *   encodeElement(element, encoder, elementSchema, seenRefs);
   * }
   * if (collection.size() > 0) {
   *   encoder.writeInt(0);
   * }
   * }
   * </pre>
   * @param mg
   * @param componentType
   * @param componentSchema
   * @param value
   * @param encoder
   */
private void encodeCollection(GeneratorAdapter mg, TypeToken<?> componentType, Schema componentSchema, int value, int encoder, int schemaLocal, int seenRefs) {
    // Encode and store the collection length locally
    mg.loadArg(value);
    mg.invokeInterface(Type.getType(Collection.class), getMethod(int.class, "size"));
    int length = mg.newLocal(Type.INT_TYPE);
    mg.storeLocal(length);
    mg.loadArg(encoder);
    mg.loadLocal(length);
    mg.invokeInterface(Type.getType(Encoder.class), getMethod(Encoder.class, "writeInt", int.class));
    mg.pop();
    // Store the component schema
    mg.loadArg(schemaLocal);
    mg.invokeVirtual(Type.getType(Schema.class), getMethod(Schema.class, "getComponentSchema"));
    int componentSchemaLocal = mg.newLocal(Type.getType(Schema.class));
    mg.storeLocal(componentSchemaLocal);
    // Store the iterator
    int iterator = mg.newLocal(Type.getType(Iterator.class));
    mg.loadArg(value);
    mg.invokeInterface(Type.getType(Collection.class), getMethod(Iterator.class, "iterator"));
    mg.storeLocal(iterator);
    // For loop with iterator. Encode each component
    Label beginFor = mg.mark();
    Label endFor = mg.newLabel();
    mg.loadLocal(iterator);
    mg.invokeInterface(Type.getType(Iterator.class), getMethod(boolean.class, "hasNext"));
    mg.ifZCmp(GeneratorAdapter.EQ, endFor);
    // Call the encode method for encoding the element.
    mg.loadThis();
    mg.loadLocal(iterator);
    mg.invokeInterface(Type.getType(Iterator.class), getMethod(Object.class, "next"));
    doCast(mg, componentType, componentSchema);
    mg.loadArg(encoder);
    mg.loadLocal(componentSchemaLocal);
    mg.loadArg(seenRefs);
    mg.invokeVirtual(classType, getEncodeMethod(componentType, componentSchema));
    mg.goTo(beginFor);
    mg.mark(endFor);
    // if length > 0, write out 0 at the end of array.
    Label zeroLength = mg.newLabel();
    mg.loadLocal(length);
    mg.ifZCmp(GeneratorAdapter.LE, zeroLength);
    encodeInt(mg, 0, encoder);
    mg.mark(zeroLength);
}
Also used : Encoder(co.cask.cdap.common.io.Encoder) Schema(co.cask.cdap.api.data.schema.Schema) Iterator(java.util.Iterator) Label(org.objectweb.asm.Label) Collection(java.util.Collection)

Example 33 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class DatumWriterGenerator method generateConstructor.

/**
   * Generates the constructor. The constructor generated has signature {@code (Schema, FieldAccessorFactory)}.
   */
private void generateConstructor() {
    Method constructor = getMethod(void.class, "<init>", Schema.class, FieldAccessorFactory.class);
    // Constructor(Schema schema, FieldAccessorFactory accessorFactory)
    GeneratorAdapter mg = new GeneratorAdapter(Opcodes.ACC_PUBLIC, constructor, null, null, classWriter);
    // super(); // Calling Object constructor
    mg.loadThis();
    mg.invokeConstructor(Type.getType(Object.class), getMethod(void.class, "<init>"));
    // if (!SCHEMA_HASH.equals(schema.getSchemaHash().toString())) { throw IllegalArgumentException }
    mg.getStatic(classType, "SCHEMA_HASH", Type.getType(String.class));
    mg.loadArg(0);
    mg.invokeVirtual(Type.getType(Schema.class), getMethod(SchemaHash.class, "getSchemaHash"));
    mg.invokeVirtual(Type.getType(SchemaHash.class), getMethod(String.class, "toString"));
    mg.invokeVirtual(Type.getType(String.class), getMethod(boolean.class, "equals", Object.class));
    Label hashEquals = mg.newLabel();
    mg.ifZCmp(GeneratorAdapter.NE, hashEquals);
    mg.throwException(Type.getType(IllegalArgumentException.class), "Schema not match.");
    mg.mark(hashEquals);
    // this.schema = schema;
    mg.loadThis();
    mg.loadArg(0);
    mg.putField(classType, "schema", Type.getType(Schema.class));
    // For each record field that needs an accessor, get the accessor and store it in field.
    for (Map.Entry<TypeToken<?>, String> entry : fieldAccessorRequests.entries()) {
        String fieldAccessorName = getFieldAccessorName(entry.getKey(), entry.getValue());
        classWriter.visitField(Opcodes.ACC_PRIVATE + Opcodes.ACC_FINAL, fieldAccessorName, Type.getDescriptor(FieldAccessor.class), null, null);
        // this.fieldAccessorName
        //  = accessorFactory.getFieldAccessor(TypeToken.of(Class.forName("className")), "fieldName");
        mg.loadThis();
        mg.loadArg(1);
        mg.push(entry.getKey().getRawType().getName());
        mg.invokeStatic(Type.getType(Class.class), getMethod(Class.class, "forName", String.class));
        mg.invokeStatic(Type.getType(TypeToken.class), getMethod(TypeToken.class, "of", Class.class));
        mg.push(entry.getValue());
        mg.invokeInterface(Type.getType(FieldAccessorFactory.class), getMethod(FieldAccessor.class, "getFieldAccessor", TypeToken.class, String.class));
        mg.putField(classType, fieldAccessorName, Type.getType(FieldAccessor.class));
    }
    mg.returnValue();
    mg.endMethod();
}
Also used : SchemaHash(co.cask.cdap.api.data.schema.SchemaHash) Schema(co.cask.cdap.api.data.schema.Schema) Label(org.objectweb.asm.Label) Method(org.objectweb.asm.commons.Method) TypeToken(com.google.common.reflect.TypeToken) GeneratorAdapter(org.objectweb.asm.commons.GeneratorAdapter) Map(java.util.Map)

Example 34 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class DatumWriterGenerator method getEncodeMethod.

/**
   * Returns the encode method for the given type and schema. The same method will be returned if the same
   * type and schema has been passed to the method before.
   *
   * @param outputType Type information of the data type for output
   * @param schema Schema to use for output.
   * @return A method for encoding the given output type and schema.
   */
private Method getEncodeMethod(TypeToken<?> outputType, Schema schema) {
    String key = String.format("%s%s", normalizeTypeName(outputType), schema.getSchemaHash());
    Method method = encodeMethods.get(key);
    if (method != null) {
        return method;
    }
    // Generate the encode method (value, encoder, schema, set)
    TypeToken<?> callOutputType = getCallTypeToken(outputType, schema);
    String methodName = String.format("encode%s", key);
    method = getMethod(void.class, methodName, callOutputType.getRawType(), Encoder.class, Schema.class, Set.class);
    // Put the method into map first before generating the body in order to support recursive data type.
    encodeMethods.put(key, method);
    String methodSignature = Signatures.getMethodSignature(method, TypeToken.of(void.class), callOutputType, null, null, new TypeToken<Set<Object>>() {
    });
    GeneratorAdapter mg = new GeneratorAdapter(Opcodes.ACC_PRIVATE, method, methodSignature, new Type[] { Type.getType(IOException.class) }, classWriter);
    generateEncodeBody(mg, schema, outputType, 0, 1, 2, 3);
    mg.returnValue();
    mg.endMethod();
    return method;
}
Also used : Set(java.util.Set) Encoder(co.cask.cdap.common.io.Encoder) Schema(co.cask.cdap.api.data.schema.Schema) GeneratorAdapter(org.objectweb.asm.commons.GeneratorAdapter) Method(org.objectweb.asm.commons.Method) IOException(java.io.IOException)

Example 35 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class ReflectionDatumReader method readMap.

@Override
protected Object readMap(Decoder decoder, Schema sourceSchema, Schema targetSchema, TypeToken<?> targetTypeToken) throws IOException {
    check(Map.class.isAssignableFrom(targetTypeToken.getRawType()), "Only map type is supported for map data.");
    Type type = targetTypeToken.getType();
    Preconditions.checkArgument(type instanceof ParameterizedType, "Only parameterized map is supported.");
    // suppressing warning because we know type is not null
    @SuppressWarnings("ConstantConditions") Type[] typeArgs = ((ParameterizedType) type).getActualTypeArguments();
    int len = decoder.readInt();
    // unchecked cast is ok, we're assuming the type token is correct
    @SuppressWarnings("unchecked") Map<Object, Object> map = (Map<Object, Object>) create(targetTypeToken);
    while (len != 0) {
        for (int i = 0; i < len; i++) {
            Map.Entry<Schema, Schema> sourceEntry = sourceSchema.getMapSchema();
            Map.Entry<Schema, Schema> targetEntry = targetSchema.getMapSchema();
            map.put(read(decoder, sourceEntry.getKey(), targetEntry.getKey(), TypeToken.of(typeArgs[0])), read(decoder, sourceEntry.getValue(), targetEntry.getValue(), TypeToken.of(typeArgs[1])));
        }
        len = decoder.readInt();
    }
    return map;
}
Also used : ParameterizedType(java.lang.reflect.ParameterizedType) ParameterizedType(java.lang.reflect.ParameterizedType) Type(java.lang.reflect.Type) Schema(co.cask.cdap.api.data.schema.Schema) Map(java.util.Map)

Aggregations

Schema (co.cask.cdap.api.data.schema.Schema)210 Test (org.junit.Test)92 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)69 Table (co.cask.cdap.api.dataset.table.Table)38 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)35 ApplicationId (co.cask.cdap.proto.id.ApplicationId)34 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)32 ApplicationManager (co.cask.cdap.test.ApplicationManager)30 AppRequest (co.cask.cdap.proto.artifact.AppRequest)29 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)24 IOException (java.io.IOException)23 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)22 ReflectionSchemaGenerator (co.cask.cdap.internal.io.ReflectionSchemaGenerator)22 ArrayList (java.util.ArrayList)22 WorkflowManager (co.cask.cdap.test.WorkflowManager)20 Map (java.util.Map)18 Set (java.util.Set)14 UnsupportedTypeException (co.cask.cdap.api.data.schema.UnsupportedTypeException)12 HashMap (java.util.HashMap)12 HashSet (java.util.HashSet)11