use of org.apache.spark.sql.types.ArrayType in project carbondata by apache.
the class CarbonColumnVectorWrapper method convertSparkToCarbonDataType.
// TODO: this is copied from carbondata-spark-common module, use presto type instead of this
private org.apache.carbondata.core.metadata.datatype.DataType convertSparkToCarbonDataType(org.apache.spark.sql.types.DataType dataType) {
if (dataType instanceof StringType) {
return DataTypes.STRING;
} else if (dataType instanceof ShortType) {
return DataTypes.SHORT;
} else if (dataType instanceof IntegerType) {
return DataTypes.INT;
} else if (dataType instanceof LongType) {
return DataTypes.LONG;
} else if (dataType instanceof DoubleType) {
return DataTypes.DOUBLE;
} else if (dataType instanceof FloatType) {
return DataTypes.FLOAT;
} else if (dataType instanceof DateType) {
return DataTypes.DATE;
} else if (dataType instanceof BooleanType) {
return DataTypes.BOOLEAN;
} else if (dataType instanceof TimestampType) {
return DataTypes.TIMESTAMP;
} else if (dataType instanceof NullType) {
return DataTypes.NULL;
} else if (dataType instanceof DecimalType) {
DecimalType decimal = (DecimalType) dataType;
return DataTypes.createDecimalType(decimal.precision(), decimal.scale());
} else if (dataType instanceof ArrayType) {
org.apache.spark.sql.types.DataType elementType = ((ArrayType) dataType).elementType();
return DataTypes.createArrayType(convertSparkToCarbonDataType(elementType));
} else if (dataType instanceof StructType) {
StructType structType = (StructType) dataType;
org.apache.spark.sql.types.StructField[] fields = structType.fields();
List<StructField> carbonFields = new ArrayList<>();
for (org.apache.spark.sql.types.StructField field : fields) {
carbonFields.add(new StructField(field.name(), convertSparkToCarbonDataType(field.dataType())));
}
return DataTypes.createStructType(carbonFields);
} else {
throw new UnsupportedOperationException("getting " + dataType + " from presto");
}
}
use of org.apache.spark.sql.types.ArrayType in project cdap by caskdata.
the class DataFramesTest method testArrayType.
@Test
public void testArrayType() {
// Simple array
Schema schema = Schema.arrayOf(Schema.of(Schema.Type.INT));
ArrayType dataType = DataFrames.toDataType(schema);
Assert.assertFalse(dataType.containsNull());
Assert.assertEquals(DataTypes.IntegerType, dataType.elementType());
Assert.assertEquals(schema, DataFrames.toSchema(dataType));
// Array with nullable element
schema = Schema.arrayOf(Schema.nullableOf(Schema.of(Schema.Type.STRING)));
dataType = DataFrames.toDataType(schema);
Assert.assertTrue(dataType.containsNull());
Assert.assertEquals(DataTypes.StringType, dataType.elementType());
Assert.assertEquals(schema, DataFrames.toSchema(dataType));
// Byte array special case
dataType = ArrayType.apply(DataTypes.ByteType);
Assert.assertEquals(Schema.of(Schema.Type.BYTES), DataFrames.toSchema(dataType));
}
use of org.apache.spark.sql.types.ArrayType in project bunsen by cerner.
the class SchemaConverterTest method getField.
/**
* Returns the type of a nested field.
*/
DataType getField(DataType dataType, boolean isNullable, String... names) {
StructType schema = dataType instanceof ArrayType ? (StructType) ((ArrayType) dataType).elementType() : (StructType) dataType;
StructField field = Arrays.stream(schema.fields()).filter(sf -> sf.name().equalsIgnoreCase(names[0])).findFirst().get();
DataType child = field.dataType();
// Recurse through children if there are more names.
if (names.length == 1) {
// Check the nullability.
Assert.assertEquals("Unexpected nullability of field " + field.name(), isNullable, field.nullable());
return child;
} else {
return getField(child, isNullable, Arrays.copyOfRange(names, 1, names.length));
}
}
use of org.apache.spark.sql.types.ArrayType in project bunsen by cerner.
the class SchemaConverterTest method codeableConceptToStruct.
@Test
public void codeableConceptToStruct() {
DataType codeableType = getField(conditionSchema, true, "severity");
Assert.assertTrue(codeableType instanceof StructType);
Assert.assertTrue(getField(codeableType, true, "coding") instanceof ArrayType);
Assert.assertTrue(getField(codeableType, true, "text") instanceof StringType);
}
use of org.apache.spark.sql.types.ArrayType in project cdap by caskdata.
the class DataFrames method dataTypeToSchema.
/**
* Converts a Spark {@link DataType} to a {@link Schema} object.
*
* @param dataType the data type to convert from
* @param recordCounter tracks number of record schema becoming created; used for record name generation only
* @return a new {@link Schema}.
*/
private static Schema dataTypeToSchema(DataType dataType, int[] recordCounter) {
if (dataType.equals(DataTypes.NullType)) {
return Schema.of(Schema.Type.NULL);
}
if (dataType.equals(DataTypes.BooleanType)) {
return Schema.of(Schema.Type.BOOLEAN);
}
if (dataType.equals(DataTypes.ByteType)) {
return Schema.of(Schema.Type.INT);
}
if (dataType.equals(DataTypes.ShortType)) {
return Schema.of(Schema.Type.INT);
}
if (dataType.equals(DataTypes.IntegerType)) {
return Schema.of(Schema.Type.INT);
}
if (dataType.equals(DataTypes.LongType)) {
return Schema.of(Schema.Type.LONG);
}
if (dataType.equals(DataTypes.FloatType)) {
return Schema.of(Schema.Type.FLOAT);
}
if (dataType.equals(DataTypes.DoubleType)) {
return Schema.of(Schema.Type.DOUBLE);
}
if (dataType.equals(DataTypes.BinaryType)) {
return Schema.of(Schema.Type.BYTES);
}
if (dataType.equals(DataTypes.StringType)) {
return Schema.of(Schema.Type.STRING);
}
if (dataType instanceof ArrayType) {
ArrayType arrayType = (ArrayType) dataType;
// Special case for byte array
if (arrayType.elementType() == DataTypes.ByteType) {
return Schema.of(Schema.Type.BYTES);
}
Schema componentSchema = dataTypeToSchema(arrayType.elementType(), recordCounter);
return Schema.arrayOf(arrayType.containsNull() ? Schema.nullableOf(componentSchema) : componentSchema);
}
if (dataType instanceof MapType) {
MapType mapType = (MapType) dataType;
Schema valueSchema = dataTypeToSchema(mapType.valueType(), recordCounter);
return Schema.mapOf(dataTypeToSchema(mapType.keyType(), recordCounter), mapType.valueContainsNull() ? Schema.nullableOf(valueSchema) : valueSchema);
}
if (dataType instanceof StructType) {
List<Schema.Field> fields = new ArrayList<>();
for (StructField structField : ((StructType) dataType).fields()) {
Schema fieldSchema = dataTypeToSchema(structField.dataType(), recordCounter);
fields.add(Schema.Field.of(structField.name(), structField.nullable() ? Schema.nullableOf(fieldSchema) : fieldSchema));
}
return Schema.recordOf("Record" + recordCounter[0]++, fields);
}
// Some special types in Spark SQL
if (dataType.equals(DataTypes.TimestampType)) {
return Schema.of(Schema.Type.LONG);
}
if (dataType.equals(DataTypes.DateType)) {
return Schema.of(Schema.Type.LONG);
}
// Not support the CalendarInterval type for now, as there is no equivalent in Schema
throw new IllegalArgumentException("Unsupported data type: " + dataType.typeName());
}
Aggregations