use of org.apache.spark.sql.types.MapType in project cdap by caskdata.
the class DataFrames method dataTypeToSchema.
/**
* Converts a Spark {@link DataType} to a {@link Schema} object.
*
* @param dataType the data type to convert from
* @param recordCounter tracks number of record schema becoming created; used for record name generation only
* @return a new {@link Schema}.
*/
private static Schema dataTypeToSchema(DataType dataType, int[] recordCounter) {
if (dataType.equals(DataTypes.NullType)) {
return Schema.of(Schema.Type.NULL);
}
if (dataType.equals(DataTypes.BooleanType)) {
return Schema.of(Schema.Type.BOOLEAN);
}
if (dataType.equals(DataTypes.ByteType)) {
return Schema.of(Schema.Type.INT);
}
if (dataType.equals(DataTypes.ShortType)) {
return Schema.of(Schema.Type.INT);
}
if (dataType.equals(DataTypes.IntegerType)) {
return Schema.of(Schema.Type.INT);
}
if (dataType.equals(DataTypes.LongType)) {
return Schema.of(Schema.Type.LONG);
}
if (dataType.equals(DataTypes.FloatType)) {
return Schema.of(Schema.Type.FLOAT);
}
if (dataType.equals(DataTypes.DoubleType)) {
return Schema.of(Schema.Type.DOUBLE);
}
if (dataType.equals(DataTypes.BinaryType)) {
return Schema.of(Schema.Type.BYTES);
}
if (dataType.equals(DataTypes.StringType)) {
return Schema.of(Schema.Type.STRING);
}
if (dataType instanceof ArrayType) {
ArrayType arrayType = (ArrayType) dataType;
// Special case for byte array
if (arrayType.elementType() == DataTypes.ByteType) {
return Schema.of(Schema.Type.BYTES);
}
Schema componentSchema = dataTypeToSchema(arrayType.elementType(), recordCounter);
return Schema.arrayOf(arrayType.containsNull() ? Schema.nullableOf(componentSchema) : componentSchema);
}
if (dataType instanceof MapType) {
MapType mapType = (MapType) dataType;
Schema valueSchema = dataTypeToSchema(mapType.valueType(), recordCounter);
return Schema.mapOf(dataTypeToSchema(mapType.keyType(), recordCounter), mapType.valueContainsNull() ? Schema.nullableOf(valueSchema) : valueSchema);
}
if (dataType instanceof StructType) {
List<Schema.Field> fields = new ArrayList<>();
for (StructField structField : ((StructType) dataType).fields()) {
Schema fieldSchema = dataTypeToSchema(structField.dataType(), recordCounter);
fields.add(Schema.Field.of(structField.name(), structField.nullable() ? Schema.nullableOf(fieldSchema) : fieldSchema));
}
return Schema.recordOf("Record" + recordCounter[0]++, fields);
}
// Some special types in Spark SQL
if (dataType.equals(DataTypes.TimestampType)) {
return Schema.of(Schema.Type.LONG);
}
if (dataType.equals(DataTypes.DateType)) {
return Schema.of(Schema.Type.LONG);
}
// Not support the CalendarInterval type for now, as there is no equivalent in Schema
throw new IllegalArgumentException("Unsupported data type: " + dataType.typeName());
}
use of org.apache.spark.sql.types.MapType in project cdap by caskdata.
the class DataFrames method toRowValue.
/**
* Converts an object value to a value type acceptable by {@link Row}
*
* @param value the value to convert
* @param dataType the target {@link DataType} of the value
* @param path the current field path from the top. It is just for error message purpose.
* @return an object that is compatible with Spark {@link Row}.
*/
private static Object toRowValue(@Nullable Object value, DataType dataType, String path) {
if (value == null) {
return null;
}
if (dataType.equals(DataTypes.NullType)) {
return null;
}
if (dataType.equals(DataTypes.BooleanType)) {
return value;
}
if (dataType.equals(DataTypes.ByteType)) {
return value;
}
if (dataType.equals(DataTypes.ShortType)) {
return value;
}
if (dataType.equals(DataTypes.IntegerType)) {
return value;
}
if (dataType.equals(DataTypes.LongType)) {
return value;
}
if (dataType.equals(DataTypes.FloatType)) {
return value;
}
if (dataType.equals(DataTypes.DoubleType)) {
return value;
}
if (dataType.equals(DataTypes.BinaryType)) {
if (value instanceof ByteBuffer) {
return Bytes.toBytes((ByteBuffer) value);
}
return value;
}
if (dataType.equals(DataTypes.StringType)) {
return value;
}
if (dataType instanceof ArrayType) {
@SuppressWarnings("unchecked") Collection<Object> collection;
int size;
if (value instanceof Collection) {
collection = (Collection<Object>) value;
} else if (value.getClass().isArray()) {
collection = Arrays.asList((Object[]) value);
} else {
throw new IllegalArgumentException("Value type " + value.getClass() + " is not supported as array type value. It must either be a Collection or an array");
}
List<Object> result = new ArrayList<>(collection.size());
String elementPath = path + "[]";
ArrayType arrayType = (ArrayType) dataType;
for (Object obj : collection) {
Object elementValue = toRowValue(obj, arrayType.elementType(), elementPath);
if (elementValue == null && !arrayType.containsNull()) {
throw new IllegalArgumentException("Null value is not allowed for array element at " + elementPath);
}
result.add(elementValue);
}
return JavaConversions.asScalaBuffer(result).toSeq();
}
if (dataType instanceof MapType) {
@SuppressWarnings("unchecked") Map<Object, Object> map = (Map<Object, Object>) value;
Map<Object, Object> result = new LinkedHashMap<>(map.size());
String mapPath = path + "<>";
MapType mapType = (MapType) dataType;
for (Map.Entry<?, ?> entry : map.entrySet()) {
Object mapKey = toRowValue(entry.getKey(), mapType.keyType(), mapPath);
if (mapKey == null) {
throw new IllegalArgumentException("Null key is not allowed for map at " + mapPath);
}
Object mapValue = toRowValue(entry.getValue(), mapType.valueType(), mapPath);
if (mapValue == null && !mapType.valueContainsNull()) {
throw new IllegalArgumentException("Null value is not allowed for map at " + mapPath);
}
result.put(mapKey, mapValue);
}
return JavaConversions.mapAsScalaMap(result);
}
if (dataType instanceof StructType) {
StructuredRecord record = (StructuredRecord) value;
StructField[] fields = ((StructType) dataType).fields();
Object[] fieldValues = new Object[fields.length];
for (int i = 0; i < fields.length; i++) {
String fieldName = fields[i].name();
String fieldPath = path + "/" + fieldName;
Object fieldValue = toRowValue(record.get(fieldName), fields[i].dataType(), fieldPath);
if (fieldValue == null && !fields[i].nullable()) {
throw new IllegalArgumentException("Null value is not allowed for row field at " + fieldPath);
}
fieldValues[i] = fieldValue;
}
return RowFactory.create(fieldValues);
}
// Some special types in Spark SQL
if (dataType.equals(DataTypes.TimestampType)) {
return new Timestamp((long) value);
}
if (dataType.equals(DataTypes.DateType)) {
return new Date((long) value);
}
// Not support the CalendarInterval type for now, as there is no equivalent in Schema
throw new IllegalArgumentException("Unsupported data type: " + dataType.typeName());
}
use of org.apache.spark.sql.types.MapType in project cdap by caskdata.
the class DataFramesTest method testStructType.
@Test
public void testStructType() {
Schema schema = Schema.recordOf("Record0", Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("age", Schema.of(Schema.Type.INT)), Schema.Field.of("profile", Schema.nullableOf(Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.STRING)))));
StructType dataType = DataFrames.toDataType(schema);
Assert.assertEquals(DataTypes.StringType, dataType.apply("name").dataType());
Assert.assertFalse(dataType.apply("name").nullable());
Assert.assertEquals(DataTypes.IntegerType, dataType.apply("age").dataType());
Assert.assertFalse(dataType.apply("age").nullable());
Assert.assertTrue(dataType.apply("profile").dataType() instanceof MapType);
Assert.assertTrue(dataType.apply("profile").nullable());
Assert.assertEquals(schema, DataFrames.toSchema(dataType));
}
use of org.apache.spark.sql.types.MapType in project cdap by caskdata.
the class DataFramesTest method testMapType.
@Test
public void testMapType() {
// Simple Map
Schema schema = Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.BOOLEAN));
MapType dataType = DataFrames.toDataType(schema);
Assert.assertFalse(dataType.valueContainsNull());
Assert.assertEquals(DataTypes.StringType, dataType.keyType());
Assert.assertEquals(DataTypes.BooleanType, dataType.valueType());
Assert.assertEquals(schema, DataFrames.toSchema(dataType));
// Map with nullable value
schema = Schema.mapOf(Schema.of(Schema.Type.INT), Schema.nullableOf(Schema.of(Schema.Type.STRING)));
dataType = DataFrames.toDataType(schema);
Assert.assertTrue(dataType.valueContainsNull());
Assert.assertEquals(DataTypes.IntegerType, dataType.keyType());
Assert.assertEquals(DataTypes.StringType, dataType.valueType());
Assert.assertEquals(schema, DataFrames.toSchema(dataType));
}
Aggregations