use of org.apache.spark.sql.types.ArrayType in project cdap by caskdata.
the class DataFrames method toRowValue.
/**
* Converts an object value to a value type acceptable by {@link Row}
*
* @param value the value to convert
* @param dataType the target {@link DataType} of the value
* @param path the current field path from the top. It is just for error message purpose.
* @return an object that is compatible with Spark {@link Row}.
*/
private static Object toRowValue(@Nullable Object value, DataType dataType, String path) {
if (value == null) {
return null;
}
if (dataType.equals(DataTypes.NullType)) {
return null;
}
if (dataType.equals(DataTypes.BooleanType)) {
return value;
}
if (dataType.equals(DataTypes.ByteType)) {
return value;
}
if (dataType.equals(DataTypes.ShortType)) {
return value;
}
if (dataType.equals(DataTypes.IntegerType)) {
return value;
}
if (dataType.equals(DataTypes.LongType)) {
return value;
}
if (dataType.equals(DataTypes.FloatType)) {
return value;
}
if (dataType.equals(DataTypes.DoubleType)) {
return value;
}
if (dataType.equals(DataTypes.BinaryType)) {
if (value instanceof ByteBuffer) {
return Bytes.toBytes((ByteBuffer) value);
}
return value;
}
if (dataType.equals(DataTypes.StringType)) {
return value;
}
if (dataType instanceof ArrayType) {
@SuppressWarnings("unchecked") Collection<Object> collection;
int size;
if (value instanceof Collection) {
collection = (Collection<Object>) value;
} else if (value.getClass().isArray()) {
collection = Arrays.asList((Object[]) value);
} else {
throw new IllegalArgumentException("Value type " + value.getClass() + " is not supported as array type value. It must either be a Collection or an array");
}
List<Object> result = new ArrayList<>(collection.size());
String elementPath = path + "[]";
ArrayType arrayType = (ArrayType) dataType;
for (Object obj : collection) {
Object elementValue = toRowValue(obj, arrayType.elementType(), elementPath);
if (elementValue == null && !arrayType.containsNull()) {
throw new IllegalArgumentException("Null value is not allowed for array element at " + elementPath);
}
result.add(elementValue);
}
return JavaConversions.asScalaBuffer(result).toSeq();
}
if (dataType instanceof MapType) {
@SuppressWarnings("unchecked") Map<Object, Object> map = (Map<Object, Object>) value;
Map<Object, Object> result = new LinkedHashMap<>(map.size());
String mapPath = path + "<>";
MapType mapType = (MapType) dataType;
for (Map.Entry<?, ?> entry : map.entrySet()) {
Object mapKey = toRowValue(entry.getKey(), mapType.keyType(), mapPath);
if (mapKey == null) {
throw new IllegalArgumentException("Null key is not allowed for map at " + mapPath);
}
Object mapValue = toRowValue(entry.getValue(), mapType.valueType(), mapPath);
if (mapValue == null && !mapType.valueContainsNull()) {
throw new IllegalArgumentException("Null value is not allowed for map at " + mapPath);
}
result.put(mapKey, mapValue);
}
return JavaConversions.mapAsScalaMap(result);
}
if (dataType instanceof StructType) {
StructuredRecord record = (StructuredRecord) value;
StructField[] fields = ((StructType) dataType).fields();
Object[] fieldValues = new Object[fields.length];
for (int i = 0; i < fields.length; i++) {
String fieldName = fields[i].name();
String fieldPath = path + "/" + fieldName;
Object fieldValue = toRowValue(record.get(fieldName), fields[i].dataType(), fieldPath);
if (fieldValue == null && !fields[i].nullable()) {
throw new IllegalArgumentException("Null value is not allowed for row field at " + fieldPath);
}
fieldValues[i] = fieldValue;
}
return RowFactory.create(fieldValues);
}
// Some special types in Spark SQL
if (dataType.equals(DataTypes.TimestampType)) {
return new Timestamp((long) value);
}
if (dataType.equals(DataTypes.DateType)) {
return new Date((long) value);
}
// Not support the CalendarInterval type for now, as there is no equivalent in Schema
throw new IllegalArgumentException("Unsupported data type: " + dataType.typeName());
}
Aggregations