Search in sources :

Example 1 with ArrayType

use of org.apache.spark.sql.types.ArrayType in project carbondata by apache.

the class CarbonColumnVectorWrapper method convertSparkToCarbonDataType.

// TODO: this is copied from carbondata-spark-common module, use presto type instead of this
private org.apache.carbondata.core.metadata.datatype.DataType convertSparkToCarbonDataType(org.apache.spark.sql.types.DataType dataType) {
    if (dataType instanceof StringType) {
        return DataTypes.STRING;
    } else if (dataType instanceof ShortType) {
        return DataTypes.SHORT;
    } else if (dataType instanceof IntegerType) {
        return DataTypes.INT;
    } else if (dataType instanceof LongType) {
        return DataTypes.LONG;
    } else if (dataType instanceof DoubleType) {
        return DataTypes.DOUBLE;
    } else if (dataType instanceof FloatType) {
        return DataTypes.FLOAT;
    } else if (dataType instanceof DateType) {
        return DataTypes.DATE;
    } else if (dataType instanceof BooleanType) {
        return DataTypes.BOOLEAN;
    } else if (dataType instanceof TimestampType) {
        return DataTypes.TIMESTAMP;
    } else if (dataType instanceof NullType) {
        return DataTypes.NULL;
    } else if (dataType instanceof DecimalType) {
        DecimalType decimal = (DecimalType) dataType;
        return DataTypes.createDecimalType(decimal.precision(), decimal.scale());
    } else if (dataType instanceof ArrayType) {
        org.apache.spark.sql.types.DataType elementType = ((ArrayType) dataType).elementType();
        return DataTypes.createArrayType(convertSparkToCarbonDataType(elementType));
    } else if (dataType instanceof StructType) {
        StructType structType = (StructType) dataType;
        org.apache.spark.sql.types.StructField[] fields = structType.fields();
        List<StructField> carbonFields = new ArrayList<>();
        for (org.apache.spark.sql.types.StructField field : fields) {
            carbonFields.add(new StructField(field.name(), convertSparkToCarbonDataType(field.dataType())));
        }
        return DataTypes.createStructType(carbonFields);
    } else {
        throw new UnsupportedOperationException("getting " + dataType + " from presto");
    }
}
Also used : LongType(org.apache.spark.sql.types.LongType) StructType(org.apache.spark.sql.types.StructType) StringType(org.apache.spark.sql.types.StringType) ArrayList(java.util.ArrayList) FloatType(org.apache.spark.sql.types.FloatType) ArrayType(org.apache.spark.sql.types.ArrayType) StructField(org.apache.carbondata.core.metadata.datatype.StructField) TimestampType(org.apache.spark.sql.types.TimestampType) DataType(org.apache.carbondata.core.metadata.datatype.DataType) DateType(org.apache.spark.sql.types.DateType) ShortType(org.apache.spark.sql.types.ShortType) BooleanType(org.apache.spark.sql.types.BooleanType) IntegerType(org.apache.spark.sql.types.IntegerType) DoubleType(org.apache.spark.sql.types.DoubleType) DecimalType(org.apache.spark.sql.types.DecimalType) NullType(org.apache.spark.sql.types.NullType)

Example 2 with ArrayType

use of org.apache.spark.sql.types.ArrayType in project cdap by caskdata.

the class DataFramesTest method testArrayType.

@Test
public void testArrayType() {
    // Simple array
    Schema schema = Schema.arrayOf(Schema.of(Schema.Type.INT));
    ArrayType dataType = DataFrames.toDataType(schema);
    Assert.assertFalse(dataType.containsNull());
    Assert.assertEquals(DataTypes.IntegerType, dataType.elementType());
    Assert.assertEquals(schema, DataFrames.toSchema(dataType));
    // Array with nullable element
    schema = Schema.arrayOf(Schema.nullableOf(Schema.of(Schema.Type.STRING)));
    dataType = DataFrames.toDataType(schema);
    Assert.assertTrue(dataType.containsNull());
    Assert.assertEquals(DataTypes.StringType, dataType.elementType());
    Assert.assertEquals(schema, DataFrames.toSchema(dataType));
    // Byte array special case
    dataType = ArrayType.apply(DataTypes.ByteType);
    Assert.assertEquals(Schema.of(Schema.Type.BYTES), DataFrames.toSchema(dataType));
}
Also used : ArrayType(org.apache.spark.sql.types.ArrayType) Schema(co.cask.cdap.api.data.schema.Schema) Test(org.junit.Test)

Example 3 with ArrayType

use of org.apache.spark.sql.types.ArrayType in project bunsen by cerner.

the class SchemaConverterTest method getField.

/**
 * Returns the type of a nested field.
 */
DataType getField(DataType dataType, boolean isNullable, String... names) {
    StructType schema = dataType instanceof ArrayType ? (StructType) ((ArrayType) dataType).elementType() : (StructType) dataType;
    StructField field = Arrays.stream(schema.fields()).filter(sf -> sf.name().equalsIgnoreCase(names[0])).findFirst().get();
    DataType child = field.dataType();
    // Recurse through children if there are more names.
    if (names.length == 1) {
        // Check the nullability.
        Assert.assertEquals("Unexpected nullability of field " + field.name(), isNullable, field.nullable());
        return child;
    } else {
        return getField(child, isNullable, Arrays.copyOfRange(names, 1, names.length));
    }
}
Also used : ArrayType(org.apache.spark.sql.types.ArrayType) StructField(org.apache.spark.sql.types.StructField) StructType(org.apache.spark.sql.types.StructType) DataType(org.apache.spark.sql.types.DataType)

Example 4 with ArrayType

use of org.apache.spark.sql.types.ArrayType in project bunsen by cerner.

the class SchemaConverterTest method codeableConceptToStruct.

@Test
public void codeableConceptToStruct() {
    DataType codeableType = getField(conditionSchema, true, "severity");
    Assert.assertTrue(codeableType instanceof StructType);
    Assert.assertTrue(getField(codeableType, true, "coding") instanceof ArrayType);
    Assert.assertTrue(getField(codeableType, true, "text") instanceof StringType);
}
Also used : ArrayType(org.apache.spark.sql.types.ArrayType) StructType(org.apache.spark.sql.types.StructType) StringType(org.apache.spark.sql.types.StringType) DataType(org.apache.spark.sql.types.DataType) Test(org.junit.Test)

Example 5 with ArrayType

use of org.apache.spark.sql.types.ArrayType in project cdap by caskdata.

the class DataFrames method dataTypeToSchema.

/**
 * Converts a Spark {@link DataType} to a {@link Schema} object.
 *
 * @param dataType the data type to convert from
 * @param recordCounter tracks number of record schema becoming created; used for record name generation only
 * @return a new {@link Schema}.
 */
private static Schema dataTypeToSchema(DataType dataType, int[] recordCounter) {
    if (dataType.equals(DataTypes.NullType)) {
        return Schema.of(Schema.Type.NULL);
    }
    if (dataType.equals(DataTypes.BooleanType)) {
        return Schema.of(Schema.Type.BOOLEAN);
    }
    if (dataType.equals(DataTypes.ByteType)) {
        return Schema.of(Schema.Type.INT);
    }
    if (dataType.equals(DataTypes.ShortType)) {
        return Schema.of(Schema.Type.INT);
    }
    if (dataType.equals(DataTypes.IntegerType)) {
        return Schema.of(Schema.Type.INT);
    }
    if (dataType.equals(DataTypes.LongType)) {
        return Schema.of(Schema.Type.LONG);
    }
    if (dataType.equals(DataTypes.FloatType)) {
        return Schema.of(Schema.Type.FLOAT);
    }
    if (dataType.equals(DataTypes.DoubleType)) {
        return Schema.of(Schema.Type.DOUBLE);
    }
    if (dataType.equals(DataTypes.BinaryType)) {
        return Schema.of(Schema.Type.BYTES);
    }
    if (dataType.equals(DataTypes.StringType)) {
        return Schema.of(Schema.Type.STRING);
    }
    if (dataType instanceof ArrayType) {
        ArrayType arrayType = (ArrayType) dataType;
        // Special case for byte array
        if (arrayType.elementType() == DataTypes.ByteType) {
            return Schema.of(Schema.Type.BYTES);
        }
        Schema componentSchema = dataTypeToSchema(arrayType.elementType(), recordCounter);
        return Schema.arrayOf(arrayType.containsNull() ? Schema.nullableOf(componentSchema) : componentSchema);
    }
    if (dataType instanceof MapType) {
        MapType mapType = (MapType) dataType;
        Schema valueSchema = dataTypeToSchema(mapType.valueType(), recordCounter);
        return Schema.mapOf(dataTypeToSchema(mapType.keyType(), recordCounter), mapType.valueContainsNull() ? Schema.nullableOf(valueSchema) : valueSchema);
    }
    if (dataType instanceof StructType) {
        List<Schema.Field> fields = new ArrayList<>();
        for (StructField structField : ((StructType) dataType).fields()) {
            Schema fieldSchema = dataTypeToSchema(structField.dataType(), recordCounter);
            fields.add(Schema.Field.of(structField.name(), structField.nullable() ? Schema.nullableOf(fieldSchema) : fieldSchema));
        }
        return Schema.recordOf("Record" + recordCounter[0]++, fields);
    }
    // Some special types in Spark SQL
    if (dataType.equals(DataTypes.TimestampType)) {
        return Schema.of(Schema.Type.LONG);
    }
    if (dataType.equals(DataTypes.DateType)) {
        return Schema.of(Schema.Type.LONG);
    }
    // Not support the CalendarInterval type for now, as there is no equivalent in Schema
    throw new IllegalArgumentException("Unsupported data type: " + dataType.typeName());
}
Also used : ArrayType(org.apache.spark.sql.types.ArrayType) StructField(org.apache.spark.sql.types.StructField) StructField(org.apache.spark.sql.types.StructField) StructType(org.apache.spark.sql.types.StructType) Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) MapType(org.apache.spark.sql.types.MapType)

Aggregations

ArrayType (org.apache.spark.sql.types.ArrayType)6 StructType (org.apache.spark.sql.types.StructType)5 ArrayList (java.util.ArrayList)3 StructField (org.apache.spark.sql.types.StructField)3 Schema (co.cask.cdap.api.data.schema.Schema)2 DataType (org.apache.spark.sql.types.DataType)2 MapType (org.apache.spark.sql.types.MapType)2 StringType (org.apache.spark.sql.types.StringType)2 Test (org.junit.Test)2 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)1 ByteBuffer (java.nio.ByteBuffer)1 Date (java.sql.Date)1 Timestamp (java.sql.Timestamp)1 Collection (java.util.Collection)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 DataType (org.apache.carbondata.core.metadata.datatype.DataType)1 StructField (org.apache.carbondata.core.metadata.datatype.StructField)1 BooleanType (org.apache.spark.sql.types.BooleanType)1 DateType (org.apache.spark.sql.types.DateType)1