Search in sources :

Example 21 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class HBaseSource method initialize.

@Override
public void initialize(BatchRuntimeContext context) throws Exception {
    super.initialize(context);
    Schema schema = Schema.parseJson(config.schema);
    rowRecordTransformer = new RowRecordTransformer(schema, config.rowField);
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema)

Example 22 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class RowRecordTransformer method setField.

private void setField(StructuredRecord.Builder builder, Schema.Field field, byte[] fieldBytes) {
    String fieldName = field.getName();
    Schema fieldSchema = field.getSchema();
    if (fieldBytes == null) {
        if (!fieldSchema.isNullable()) {
            throw new IllegalArgumentException("null value found for non-nullable field " + fieldName);
        }
        return;
    }
    Schema.Type fieldType = fieldSchema.isNullable() ? fieldSchema.getNonNullable().getType() : fieldSchema.getType();
    switch(fieldType) {
        case BOOLEAN:
            builder.set(fieldName, Bytes.toBoolean(fieldBytes));
            break;
        case INT:
            builder.set(fieldName, Bytes.toInt(fieldBytes));
            break;
        case LONG:
            builder.set(fieldName, Bytes.toLong(fieldBytes));
            break;
        case FLOAT:
            builder.set(fieldName, Bytes.toFloat(fieldBytes));
            break;
        case DOUBLE:
            builder.set(fieldName, Bytes.toDouble(fieldBytes));
            break;
        case BYTES:
            builder.set(fieldName, fieldBytes);
            break;
        case STRING:
            builder.set(fieldName, Bytes.toString(fieldBytes));
            break;
        default:
            // shouldn't ever happen
            throw new IllegalArgumentException("Unsupported type " + fieldType + " for field " + fieldName);
    }
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema)

Example 23 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class SparkUtils method validateLabelFieldForTrainer.

/**
 * Validate label field for trainer.
 *
 * @param inputSchema schema of the received record.
 * @param labelField field from which to get the prediction.
 */
public static void validateLabelFieldForTrainer(Schema inputSchema, String labelField) {
    Schema.Field prediction = inputSchema.getField(labelField);
    if (prediction == null) {
        throw new IllegalArgumentException(String.format("Label field %s does not exists in the input schema.", labelField));
    }
    Schema predictionSchema = prediction.getSchema();
    Schema.Type predictionFieldType = predictionSchema.isNullableSimple() ? predictionSchema.getNonNullable().getType() : predictionSchema.getType();
    if (predictionFieldType != Schema.Type.DOUBLE) {
        throw new IllegalArgumentException(String.format("Label field must be of type Double, but was %s.", predictionFieldType));
    }
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema)

Example 24 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class SparkUtils method validateTextField.

/**
 * Validate the input field to be used for text based feature generation.
 *
 * @param inputSchema input schema coming in from the previous stage
 * @param key text field on which to perform text based feature generation
 */
public static void validateTextField(Schema inputSchema, String key) {
    if (inputSchema.getField(key) == null) {
        throw new IllegalArgumentException(String.format("Input field %s does not exist in the input schema %s.", key, inputSchema.toString()));
    }
    Schema schema = inputSchema.getField(key).getSchema();
    Schema.Type type = schema.isNullable() ? schema.getNonNullable().getType() : schema.getType();
    if (type == Schema.Type.ARRAY) {
        Schema componentSchema = schema.getComponentSchema();
        type = componentSchema.isNullable() ? componentSchema.getNonNullable().getType() : componentSchema.getType();
    }
    if (type != Schema.Type.STRING) {
        throw new IllegalArgumentException(String.format("Field to be transformed should be of type String or " + "Nullable String or Array of type String or Nullable " + "String . But was %s for field %s.", type, key));
    }
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema)

Example 25 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class SparkUtils method getInputFieldValue.

/**
 * Gets the input field for feature generation. If the field is an array, returns the field. Otherwise, splits the
 * field based on the given pattern.
 * Returns an empty list if value is null.
 * The method assumes the input field is an array of strings or a string.
 * Validation for the field type should be performed at configure time. The method will throw an exception if the
 * input field is not of required type.
 *
 * @param input input Structured Record
 * @param inputField field to use for feature generation
 * @param splitter Splitter object to be used for splitting the input string
 * @return text to be used for feature generation
 */
public static List<String> getInputFieldValue(StructuredRecord input, String inputField, Splitter splitter) {
    List<String> text = new ArrayList<>();
    Schema schema = input.getSchema().getField(inputField).getSchema();
    Schema.Type type = schema.isNullable() ? schema.getNonNullable().getType() : schema.getType();
    try {
        if (type == Schema.Type.ARRAY) {
            Object value = input.get(inputField);
            if (value instanceof List) {
                text = input.get(inputField);
            } else {
                text = Lists.newArrayList((String[]) value);
            }
        } else {
            String value = input.get(inputField);
            if (value != null) {
                text = Lists.newArrayList(splitter.split(value));
            }
        }
    } catch (ClassCastException e) {
        throw new IllegalArgumentException(String.format("Schema type mismatch for field %s. Please make sure the " + "value to be used for feature generation is an array of " + "string or a string.", inputField), e);
    }
    return text;
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

Schema (io.cdap.cdap.api.data.schema.Schema)1135 Test (org.junit.Test)664 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)432 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)177 Table (io.cdap.cdap.api.dataset.table.Table)169 ApplicationManager (io.cdap.cdap.test.ApplicationManager)148 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)141 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)133 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)130 ArrayList (java.util.ArrayList)114 HashSet (java.util.HashSet)113 HashMap (java.util.HashMap)101 WorkflowManager (io.cdap.cdap.test.WorkflowManager)96 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)81 IOException (java.io.IOException)69 FailureCollector (io.cdap.cdap.etl.api.FailureCollector)67 MockPipelineConfigurer (io.cdap.cdap.etl.mock.common.MockPipelineConfigurer)56 Map (java.util.Map)56 ETLPlugin (io.cdap.cdap.etl.proto.v2.ETLPlugin)47 ReflectionSchemaGenerator (io.cdap.cdap.internal.io.ReflectionSchemaGenerator)46