Search in sources :

Example 6 with ArrayType

use of org.apache.spark.sql.types.ArrayType in project cdap by caskdata.

the class DataFrames method toRowValue.

/**
 * Converts an object value to a value type acceptable by {@link Row}
 *
 * @param value the value to convert
 * @param dataType the target {@link DataType} of the value
 * @param path the current field path from the top. It is just for error message purpose.
 * @return an object that is compatible with Spark {@link Row}.
 */
private static Object toRowValue(@Nullable Object value, DataType dataType, String path) {
    if (value == null) {
        return null;
    }
    if (dataType.equals(DataTypes.NullType)) {
        return null;
    }
    if (dataType.equals(DataTypes.BooleanType)) {
        return value;
    }
    if (dataType.equals(DataTypes.ByteType)) {
        return value;
    }
    if (dataType.equals(DataTypes.ShortType)) {
        return value;
    }
    if (dataType.equals(DataTypes.IntegerType)) {
        return value;
    }
    if (dataType.equals(DataTypes.LongType)) {
        return value;
    }
    if (dataType.equals(DataTypes.FloatType)) {
        return value;
    }
    if (dataType.equals(DataTypes.DoubleType)) {
        return value;
    }
    if (dataType.equals(DataTypes.BinaryType)) {
        if (value instanceof ByteBuffer) {
            return Bytes.toBytes((ByteBuffer) value);
        }
        return value;
    }
    if (dataType.equals(DataTypes.StringType)) {
        return value;
    }
    if (dataType instanceof ArrayType) {
        @SuppressWarnings("unchecked") Collection<Object> collection;
        int size;
        if (value instanceof Collection) {
            collection = (Collection<Object>) value;
        } else if (value.getClass().isArray()) {
            collection = Arrays.asList((Object[]) value);
        } else {
            throw new IllegalArgumentException("Value type " + value.getClass() + " is not supported as array type value. It must either be a Collection or an array");
        }
        List<Object> result = new ArrayList<>(collection.size());
        String elementPath = path + "[]";
        ArrayType arrayType = (ArrayType) dataType;
        for (Object obj : collection) {
            Object elementValue = toRowValue(obj, arrayType.elementType(), elementPath);
            if (elementValue == null && !arrayType.containsNull()) {
                throw new IllegalArgumentException("Null value is not allowed for array element at " + elementPath);
            }
            result.add(elementValue);
        }
        return JavaConversions.asScalaBuffer(result).toSeq();
    }
    if (dataType instanceof MapType) {
        @SuppressWarnings("unchecked") Map<Object, Object> map = (Map<Object, Object>) value;
        Map<Object, Object> result = new LinkedHashMap<>(map.size());
        String mapPath = path + "<>";
        MapType mapType = (MapType) dataType;
        for (Map.Entry<?, ?> entry : map.entrySet()) {
            Object mapKey = toRowValue(entry.getKey(), mapType.keyType(), mapPath);
            if (mapKey == null) {
                throw new IllegalArgumentException("Null key is not allowed for map at " + mapPath);
            }
            Object mapValue = toRowValue(entry.getValue(), mapType.valueType(), mapPath);
            if (mapValue == null && !mapType.valueContainsNull()) {
                throw new IllegalArgumentException("Null value is not allowed for map at " + mapPath);
            }
            result.put(mapKey, mapValue);
        }
        return JavaConversions.mapAsScalaMap(result);
    }
    if (dataType instanceof StructType) {
        StructuredRecord record = (StructuredRecord) value;
        StructField[] fields = ((StructType) dataType).fields();
        Object[] fieldValues = new Object[fields.length];
        for (int i = 0; i < fields.length; i++) {
            String fieldName = fields[i].name();
            String fieldPath = path + "/" + fieldName;
            Object fieldValue = toRowValue(record.get(fieldName), fields[i].dataType(), fieldPath);
            if (fieldValue == null && !fields[i].nullable()) {
                throw new IllegalArgumentException("Null value is not allowed for row field at " + fieldPath);
            }
            fieldValues[i] = fieldValue;
        }
        return RowFactory.create(fieldValues);
    }
    // Some special types in Spark SQL
    if (dataType.equals(DataTypes.TimestampType)) {
        return new Timestamp((long) value);
    }
    if (dataType.equals(DataTypes.DateType)) {
        return new Date((long) value);
    }
    // Not support the CalendarInterval type for now, as there is no equivalent in Schema
    throw new IllegalArgumentException("Unsupported data type: " + dataType.typeName());
}
Also used : StructType(org.apache.spark.sql.types.StructType) ArrayList(java.util.ArrayList) ByteBuffer(java.nio.ByteBuffer) Timestamp(java.sql.Timestamp) MapType(org.apache.spark.sql.types.MapType) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Date(java.sql.Date) LinkedHashMap(java.util.LinkedHashMap) ArrayType(org.apache.spark.sql.types.ArrayType) StructField(org.apache.spark.sql.types.StructField) Collection(java.util.Collection) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Aggregations

ArrayType (org.apache.spark.sql.types.ArrayType)6 StructType (org.apache.spark.sql.types.StructType)5 ArrayList (java.util.ArrayList)3 StructField (org.apache.spark.sql.types.StructField)3 Schema (co.cask.cdap.api.data.schema.Schema)2 DataType (org.apache.spark.sql.types.DataType)2 MapType (org.apache.spark.sql.types.MapType)2 StringType (org.apache.spark.sql.types.StringType)2 Test (org.junit.Test)2 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)1 ByteBuffer (java.nio.ByteBuffer)1 Date (java.sql.Date)1 Timestamp (java.sql.Timestamp)1 Collection (java.util.Collection)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 DataType (org.apache.carbondata.core.metadata.datatype.DataType)1 StructField (org.apache.carbondata.core.metadata.datatype.StructField)1 BooleanType (org.apache.spark.sql.types.BooleanType)1 DateType (org.apache.spark.sql.types.DateType)1