Search in sources :

Example 1 with DecimalType

use of org.apache.spark.sql.types.DecimalType in project carbondata by apache.

the class SafeVariableLengthDimensionDataChunkStore method fillRow.

@Override
public void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) {
    // if column was explicitly sorted we need to get the rowid based inverted index reverse
    if (isExplictSorted) {
        rowId = invertedIndexReverse[rowId];
    }
    // now to get the row from memory block we need to do following thing
    // 1. first get the current offset
    // 2. if it's not a last row- get the next row offset
    // Subtract the current row offset + 2 bytes(to skip the data length) with next row offset
    // else subtract the current row offset with complete data
    // length get the offset of set of data
    int currentDataOffset = dataOffsets[rowId];
    short length = 0;
    // calculating the length of data
    if (rowId < numberOfRows - 1) {
        length = (short) (dataOffsets[rowId + 1] - (currentDataOffset + CarbonCommonConstants.SHORT_SIZE_IN_BYTE));
    } else {
        // for last record
        length = (short) (this.data.length - currentDataOffset);
    }
    if (ByteUtil.UnsafeComparer.INSTANCE.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, 0, CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length, data, currentDataOffset, length)) {
        vector.putNull(vectorRow);
    } else {
        DataType dt = vector.getType();
        if (dt instanceof StringType) {
            vector.putBytes(vectorRow, currentDataOffset, length, data);
        } else if (dt instanceof BooleanType) {
            vector.putBoolean(vectorRow, ByteUtil.toBoolean(data[currentDataOffset]));
        } else if (dt instanceof ShortType) {
            vector.putShort(vectorRow, ByteUtil.toShort(data, currentDataOffset, length));
        } else if (dt instanceof IntegerType) {
            vector.putInt(vectorRow, ByteUtil.toInt(data, currentDataOffset, length));
        } else if (dt instanceof FloatType) {
            vector.putFloat(vectorRow, ByteUtil.toFloat(data, currentDataOffset));
        } else if (dt instanceof DoubleType) {
            vector.putDouble(vectorRow, ByteUtil.toDouble(data, currentDataOffset));
        } else if (dt instanceof LongType) {
            vector.putLong(vectorRow, ByteUtil.toLong(data, currentDataOffset, length));
        } else if (dt instanceof DecimalType) {
            vector.putDecimal(vectorRow, Decimal.apply(ByteUtil.toBigDecimal(data, currentDataOffset, length)), DecimalType.MAX_PRECISION());
        }
    }
}
Also used : IntegerType(org.apache.spark.sql.types.IntegerType) LongType(org.apache.spark.sql.types.LongType) StringType(org.apache.spark.sql.types.StringType) DoubleType(org.apache.spark.sql.types.DoubleType) ShortType(org.apache.spark.sql.types.ShortType) BooleanType(org.apache.spark.sql.types.BooleanType) DataType(org.apache.spark.sql.types.DataType) DecimalType(org.apache.spark.sql.types.DecimalType) FloatType(org.apache.spark.sql.types.FloatType)

Example 2 with DecimalType

use of org.apache.spark.sql.types.DecimalType in project carbondata by apache.

the class VectorizedCarbonRecordReader method initBatch.

/**
   * Returns the ColumnarBatch object that will be used for all rows returned by this reader.
   * This object is reused. Calling this enables the vectorized reader. This should be called
   * before any calls to nextKeyValue/nextBatch.
   */
private void initBatch(MemoryMode memMode) {
    List<QueryDimension> queryDimension = queryModel.getQueryDimension();
    List<QueryMeasure> queryMeasures = queryModel.getQueryMeasures();
    StructField[] fields = new StructField[queryDimension.size() + queryMeasures.size()];
    for (int i = 0; i < queryDimension.size(); i++) {
        QueryDimension dim = queryDimension.get(i);
        if (dim.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
            DirectDictionaryGenerator generator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dim.getDimension().getDataType());
            fields[dim.getQueryOrder()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(generator.getReturnType()), true, null);
        } else if (!dim.getDimension().hasEncoding(Encoding.DICTIONARY)) {
            fields[dim.getQueryOrder()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(dim.getDimension().getDataType()), true, null);
        } else if (dim.getDimension().isComplex()) {
            fields[dim.getQueryOrder()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(dim.getDimension().getDataType()), true, null);
        } else {
            fields[dim.getQueryOrder()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(DataType.INT), true, null);
        }
    }
    for (int i = 0; i < queryMeasures.size(); i++) {
        QueryMeasure msr = queryMeasures.get(i);
        switch(msr.getMeasure().getDataType()) {
            case SHORT:
            case INT:
            case LONG:
                fields[msr.getQueryOrder()] = new StructField(msr.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(msr.getMeasure().getDataType()), true, null);
                break;
            case DECIMAL:
                fields[msr.getQueryOrder()] = new StructField(msr.getColumnName(), new DecimalType(msr.getMeasure().getPrecision(), msr.getMeasure().getScale()), true, null);
                break;
            default:
                fields[msr.getQueryOrder()] = new StructField(msr.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(DataType.DOUBLE), true, null);
        }
    }
    columnarBatch = ColumnarBatch.allocate(new StructType(fields), memMode);
    CarbonColumnVector[] vectors = new CarbonColumnVector[fields.length];
    boolean[] filteredRows = new boolean[columnarBatch.capacity()];
    for (int i = 0; i < fields.length; i++) {
        vectors[i] = new ColumnarVectorWrapper(columnarBatch.column(i), filteredRows);
    }
    carbonColumnarBatch = new CarbonColumnarBatch(vectors, columnarBatch.capacity(), filteredRows);
}
Also used : StructType(org.apache.spark.sql.types.StructType) CarbonColumnarBatch(org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch) CarbonColumnVector(org.apache.carbondata.core.scan.result.vector.CarbonColumnVector) StructField(org.apache.spark.sql.types.StructField) QueryMeasure(org.apache.carbondata.core.scan.model.QueryMeasure) DecimalType(org.apache.spark.sql.types.DecimalType) DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator) QueryDimension(org.apache.carbondata.core.scan.model.QueryDimension)

Aggregations

DecimalType (org.apache.spark.sql.types.DecimalType)2 DirectDictionaryGenerator (org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator)1 QueryDimension (org.apache.carbondata.core.scan.model.QueryDimension)1 QueryMeasure (org.apache.carbondata.core.scan.model.QueryMeasure)1 CarbonColumnVector (org.apache.carbondata.core.scan.result.vector.CarbonColumnVector)1 CarbonColumnarBatch (org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch)1 BooleanType (org.apache.spark.sql.types.BooleanType)1 DataType (org.apache.spark.sql.types.DataType)1 DoubleType (org.apache.spark.sql.types.DoubleType)1 FloatType (org.apache.spark.sql.types.FloatType)1 IntegerType (org.apache.spark.sql.types.IntegerType)1 LongType (org.apache.spark.sql.types.LongType)1 ShortType (org.apache.spark.sql.types.ShortType)1 StringType (org.apache.spark.sql.types.StringType)1 StructField (org.apache.spark.sql.types.StructField)1 StructType (org.apache.spark.sql.types.StructType)1