use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.
the class HBaseSource method initialize.
@Override
public void initialize(BatchRuntimeContext context) throws Exception {
super.initialize(context);
Schema schema = Schema.parseJson(config.schema);
rowRecordTransformer = new RowRecordTransformer(schema, config.rowField);
}
use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.
the class RowRecordTransformer method setField.
private void setField(StructuredRecord.Builder builder, Schema.Field field, byte[] fieldBytes) {
String fieldName = field.getName();
Schema fieldSchema = field.getSchema();
if (fieldBytes == null) {
if (!fieldSchema.isNullable()) {
throw new IllegalArgumentException("null value found for non-nullable field " + fieldName);
}
return;
}
Schema.Type fieldType = fieldSchema.isNullable() ? fieldSchema.getNonNullable().getType() : fieldSchema.getType();
switch(fieldType) {
case BOOLEAN:
builder.set(fieldName, Bytes.toBoolean(fieldBytes));
break;
case INT:
builder.set(fieldName, Bytes.toInt(fieldBytes));
break;
case LONG:
builder.set(fieldName, Bytes.toLong(fieldBytes));
break;
case FLOAT:
builder.set(fieldName, Bytes.toFloat(fieldBytes));
break;
case DOUBLE:
builder.set(fieldName, Bytes.toDouble(fieldBytes));
break;
case BYTES:
builder.set(fieldName, fieldBytes);
break;
case STRING:
builder.set(fieldName, Bytes.toString(fieldBytes));
break;
default:
// shouldn't ever happen
throw new IllegalArgumentException("Unsupported type " + fieldType + " for field " + fieldName);
}
}
use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.
the class SparkUtils method validateLabelFieldForTrainer.
/**
* Validate label field for trainer.
*
* @param inputSchema schema of the received record.
* @param labelField field from which to get the prediction.
*/
public static void validateLabelFieldForTrainer(Schema inputSchema, String labelField) {
Schema.Field prediction = inputSchema.getField(labelField);
if (prediction == null) {
throw new IllegalArgumentException(String.format("Label field %s does not exists in the input schema.", labelField));
}
Schema predictionSchema = prediction.getSchema();
Schema.Type predictionFieldType = predictionSchema.isNullableSimple() ? predictionSchema.getNonNullable().getType() : predictionSchema.getType();
if (predictionFieldType != Schema.Type.DOUBLE) {
throw new IllegalArgumentException(String.format("Label field must be of type Double, but was %s.", predictionFieldType));
}
}
use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.
the class SparkUtils method validateTextField.
/**
* Validate the input field to be used for text based feature generation.
*
* @param inputSchema input schema coming in from the previous stage
* @param key text field on which to perform text based feature generation
*/
public static void validateTextField(Schema inputSchema, String key) {
if (inputSchema.getField(key) == null) {
throw new IllegalArgumentException(String.format("Input field %s does not exist in the input schema %s.", key, inputSchema.toString()));
}
Schema schema = inputSchema.getField(key).getSchema();
Schema.Type type = schema.isNullable() ? schema.getNonNullable().getType() : schema.getType();
if (type == Schema.Type.ARRAY) {
Schema componentSchema = schema.getComponentSchema();
type = componentSchema.isNullable() ? componentSchema.getNonNullable().getType() : componentSchema.getType();
}
if (type != Schema.Type.STRING) {
throw new IllegalArgumentException(String.format("Field to be transformed should be of type String or " + "Nullable String or Array of type String or Nullable " + "String . But was %s for field %s.", type, key));
}
}
use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.
the class SparkUtils method getInputFieldValue.
/**
* Gets the input field for feature generation. If the field is an array, returns the field. Otherwise, splits the
* field based on the given pattern.
* Returns an empty list if value is null.
* The method assumes the input field is an array of strings or a string.
* Validation for the field type should be performed at configure time. The method will throw an exception if the
* input field is not of required type.
*
* @param input input Structured Record
* @param inputField field to use for feature generation
* @param splitter Splitter object to be used for splitting the input string
* @return text to be used for feature generation
*/
public static List<String> getInputFieldValue(StructuredRecord input, String inputField, Splitter splitter) {
List<String> text = new ArrayList<>();
Schema schema = input.getSchema().getField(inputField).getSchema();
Schema.Type type = schema.isNullable() ? schema.getNonNullable().getType() : schema.getType();
try {
if (type == Schema.Type.ARRAY) {
Object value = input.get(inputField);
if (value instanceof List) {
text = input.get(inputField);
} else {
text = Lists.newArrayList((String[]) value);
}
} else {
String value = input.get(inputField);
if (value != null) {
text = Lists.newArrayList(splitter.split(value));
}
}
} catch (ClassCastException e) {
throw new IllegalArgumentException(String.format("Schema type mismatch for field %s. Please make sure the " + "value to be used for feature generation is an array of " + "string or a string.", inputField), e);
}
return text;
}
Aggregations