Search in sources :

Example 6 with StringType$

use of org.apache.spark.sql.types.StringType$ in project carbondata by apache.

the class CarbonColumnVectorWrapper method convertSparkToCarbonDataType.

// TODO: this is copied from carbondata-spark-common module, use presto type instead of this
private org.apache.carbondata.core.metadata.datatype.DataType convertSparkToCarbonDataType(org.apache.spark.sql.types.DataType dataType) {
    if (dataType instanceof StringType) {
        return DataTypes.STRING;
    } else if (dataType instanceof ShortType) {
        return DataTypes.SHORT;
    } else if (dataType instanceof IntegerType) {
        return DataTypes.INT;
    } else if (dataType instanceof LongType) {
        return DataTypes.LONG;
    } else if (dataType instanceof DoubleType) {
        return DataTypes.DOUBLE;
    } else if (dataType instanceof FloatType) {
        return DataTypes.FLOAT;
    } else if (dataType instanceof DateType) {
        return DataTypes.DATE;
    } else if (dataType instanceof BooleanType) {
        return DataTypes.BOOLEAN;
    } else if (dataType instanceof TimestampType) {
        return DataTypes.TIMESTAMP;
    } else if (dataType instanceof NullType) {
        return DataTypes.NULL;
    } else if (dataType instanceof DecimalType) {
        DecimalType decimal = (DecimalType) dataType;
        return DataTypes.createDecimalType(decimal.precision(), decimal.scale());
    } else if (dataType instanceof ArrayType) {
        org.apache.spark.sql.types.DataType elementType = ((ArrayType) dataType).elementType();
        return DataTypes.createArrayType(convertSparkToCarbonDataType(elementType));
    } else if (dataType instanceof StructType) {
        StructType structType = (StructType) dataType;
        org.apache.spark.sql.types.StructField[] fields = structType.fields();
        List<StructField> carbonFields = new ArrayList<>();
        for (org.apache.spark.sql.types.StructField field : fields) {
            carbonFields.add(new StructField(field.name(), convertSparkToCarbonDataType(field.dataType())));
        }
        return DataTypes.createStructType(carbonFields);
    } else {
        throw new UnsupportedOperationException("getting " + dataType + " from presto");
    }
}
Also used : LongType(org.apache.spark.sql.types.LongType) StructType(org.apache.spark.sql.types.StructType) StringType(org.apache.spark.sql.types.StringType) ArrayList(java.util.ArrayList) FloatType(org.apache.spark.sql.types.FloatType) ArrayType(org.apache.spark.sql.types.ArrayType) StructField(org.apache.carbondata.core.metadata.datatype.StructField) TimestampType(org.apache.spark.sql.types.TimestampType) DataType(org.apache.carbondata.core.metadata.datatype.DataType) DateType(org.apache.spark.sql.types.DateType) ShortType(org.apache.spark.sql.types.ShortType) BooleanType(org.apache.spark.sql.types.BooleanType) IntegerType(org.apache.spark.sql.types.IntegerType) DoubleType(org.apache.spark.sql.types.DoubleType) DecimalType(org.apache.spark.sql.types.DecimalType) NullType(org.apache.spark.sql.types.NullType)

Example 7 with StringType$

use of org.apache.spark.sql.types.StringType$ in project bunsen by cerner.

the class SchemaConverterTest method codingToStruct.

@Test
public void codingToStruct() {
    DataType codingType = getField(conditionSchema, true, "severity", "coding");
    Assert.assertTrue(getField(codingType, true, "system") instanceof StringType);
    Assert.assertTrue(getField(codingType, true, "version") instanceof StringType);
    Assert.assertTrue(getField(codingType, true, "code") instanceof StringType);
    Assert.assertTrue(getField(codingType, true, "display") instanceof StringType);
    Assert.assertTrue(getField(codingType, true, "userSelected") instanceof BooleanType);
}
Also used : StringType(org.apache.spark.sql.types.StringType) BooleanType(org.apache.spark.sql.types.BooleanType) DataType(org.apache.spark.sql.types.DataType) Test(org.junit.Test)

Example 8 with StringType$

use of org.apache.spark.sql.types.StringType$ in project bunsen by cerner.

the class SchemaConverterTest method codeableConceptToStruct.

@Test
public void codeableConceptToStruct() {
    DataType codeableType = getField(conditionSchema, true, "severity");
    Assert.assertTrue(codeableType instanceof StructType);
    Assert.assertTrue(getField(codeableType, true, "coding") instanceof ArrayType);
    Assert.assertTrue(getField(codeableType, true, "text") instanceof StringType);
}
Also used : ArrayType(org.apache.spark.sql.types.ArrayType) StructType(org.apache.spark.sql.types.StructType) StringType(org.apache.spark.sql.types.StringType) DataType(org.apache.spark.sql.types.DataType) Test(org.junit.Test)

Example 9 with StringType$

use of org.apache.spark.sql.types.StringType$ in project jpmml-sparkml by jpmml.

the class SparkMLEncoder method createDataField.

public DataField createDataField(FieldName name) {
    StructType schema = getSchema();
    StructField field = schema.apply(name.getValue());
    org.apache.spark.sql.types.DataType sparkDataType = field.dataType();
    if (sparkDataType instanceof StringType) {
        return createDataField(name, OpType.CATEGORICAL, DataType.STRING);
    } else if (sparkDataType instanceof IntegralType) {
        return createDataField(name, OpType.CONTINUOUS, DataType.INTEGER);
    } else if (sparkDataType instanceof DoubleType) {
        return createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE);
    } else if (sparkDataType instanceof BooleanType) {
        return createDataField(name, OpType.CATEGORICAL, DataType.BOOLEAN);
    } else {
        throw new IllegalArgumentException("Expected string, integral, double or boolean data type, got " + sparkDataType.typeName() + " data type");
    }
}
Also used : StructField(org.apache.spark.sql.types.StructField) StructType(org.apache.spark.sql.types.StructType) StringType(org.apache.spark.sql.types.StringType) IntegralType(org.apache.spark.sql.types.IntegralType) DoubleType(org.apache.spark.sql.types.DoubleType) BooleanType(org.apache.spark.sql.types.BooleanType)

Example 10 with StringType$

use of org.apache.spark.sql.types.StringType$ in project jpmml-sparkml by jpmml.

the class SparkMLEncoder method createDataField.

public DataField createDataField(String name) {
    StructType schema = getSchema();
    StructField field = schema.apply(name);
    org.apache.spark.sql.types.DataType sparkDataType = field.dataType();
    if (sparkDataType instanceof StringType) {
        return createDataField(name, OpType.CATEGORICAL, DataType.STRING);
    } else if (sparkDataType instanceof IntegralType) {
        return createDataField(name, OpType.CONTINUOUS, DataType.INTEGER);
    } else if (sparkDataType instanceof DoubleType) {
        return createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE);
    } else if (sparkDataType instanceof BooleanType) {
        return createDataField(name, OpType.CATEGORICAL, DataType.BOOLEAN);
    } else {
        throw new IllegalArgumentException("Expected string, integral, double or boolean data type, got " + sparkDataType.typeName() + " data type");
    }
}
Also used : StructField(org.apache.spark.sql.types.StructField) StructType(org.apache.spark.sql.types.StructType) StringType(org.apache.spark.sql.types.StringType) IntegralType(org.apache.spark.sql.types.IntegralType) DoubleType(org.apache.spark.sql.types.DoubleType) BooleanType(org.apache.spark.sql.types.BooleanType)

Aggregations

StructType (org.apache.spark.sql.types.StructType)10 StringType (org.apache.spark.sql.types.StringType)9 BooleanType (org.apache.spark.sql.types.BooleanType)7 StructField (org.apache.spark.sql.types.StructField)7 DataType (org.apache.spark.sql.types.DataType)6 ArrayType (org.apache.spark.sql.types.ArrayType)5 DoubleType (org.apache.spark.sql.types.DoubleType)5 List (java.util.List)4 OpenLineage (io.openlineage.client.OpenLineage)3 SparkAgentTestExtension (io.openlineage.spark.agent.SparkAgentTestExtension)3 Path (java.nio.file.Path)3 Optional (java.util.Optional)3 SparkSession (org.apache.spark.sql.SparkSession)3 DateType (org.apache.spark.sql.types.DateType)3 DecimalType (org.apache.spark.sql.types.DecimalType)3 FloatType (org.apache.spark.sql.types.FloatType)3 IntegerType (org.apache.spark.sql.types.IntegerType)3 LongType (org.apache.spark.sql.types.LongType)3 Metadata (org.apache.spark.sql.types.Metadata)3 ShortType (org.apache.spark.sql.types.ShortType)3