Search in sources :

Example 1 with CarbonColumnarBatch

use of org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch in project carbondata by apache.

the class VectorizedCarbonRecordReader method initBatch.

/**
   * Returns the ColumnarBatch object that will be used for all rows returned by this reader.
   * This object is reused. Calling this enables the vectorized reader. This should be called
   * before any calls to nextKeyValue/nextBatch.
   */
private void initBatch(MemoryMode memMode) {
    List<QueryDimension> queryDimension = queryModel.getQueryDimension();
    List<QueryMeasure> queryMeasures = queryModel.getQueryMeasures();
    StructField[] fields = new StructField[queryDimension.size() + queryMeasures.size()];
    for (int i = 0; i < queryDimension.size(); i++) {
        QueryDimension dim = queryDimension.get(i);
        if (dim.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
            DirectDictionaryGenerator generator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(dim.getDimension().getDataType());
            fields[dim.getQueryOrder()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(generator.getReturnType()), true, null);
        } else if (!dim.getDimension().hasEncoding(Encoding.DICTIONARY)) {
            fields[dim.getQueryOrder()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(dim.getDimension().getDataType()), true, null);
        } else if (dim.getDimension().isComplex()) {
            fields[dim.getQueryOrder()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(dim.getDimension().getDataType()), true, null);
        } else {
            fields[dim.getQueryOrder()] = new StructField(dim.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(DataType.INT), true, null);
        }
    }
    for (int i = 0; i < queryMeasures.size(); i++) {
        QueryMeasure msr = queryMeasures.get(i);
        switch(msr.getMeasure().getDataType()) {
            case SHORT:
            case INT:
            case LONG:
                fields[msr.getQueryOrder()] = new StructField(msr.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(msr.getMeasure().getDataType()), true, null);
                break;
            case DECIMAL:
                fields[msr.getQueryOrder()] = new StructField(msr.getColumnName(), new DecimalType(msr.getMeasure().getPrecision(), msr.getMeasure().getScale()), true, null);
                break;
            default:
                fields[msr.getQueryOrder()] = new StructField(msr.getColumnName(), CarbonScalaUtil.convertCarbonToSparkDataType(DataType.DOUBLE), true, null);
        }
    }
    columnarBatch = ColumnarBatch.allocate(new StructType(fields), memMode);
    CarbonColumnVector[] vectors = new CarbonColumnVector[fields.length];
    boolean[] filteredRows = new boolean[columnarBatch.capacity()];
    for (int i = 0; i < fields.length; i++) {
        vectors[i] = new ColumnarVectorWrapper(columnarBatch.column(i), filteredRows);
    }
    carbonColumnarBatch = new CarbonColumnarBatch(vectors, columnarBatch.capacity(), filteredRows);
}
Also used : StructType(org.apache.spark.sql.types.StructType) CarbonColumnarBatch(org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch) CarbonColumnVector(org.apache.carbondata.core.scan.result.vector.CarbonColumnVector) StructField(org.apache.spark.sql.types.StructField) QueryMeasure(org.apache.carbondata.core.scan.model.QueryMeasure) DecimalType(org.apache.spark.sql.types.DecimalType) DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator) QueryDimension(org.apache.carbondata.core.scan.model.QueryDimension)

Aggregations

DirectDictionaryGenerator (org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator)1 QueryDimension (org.apache.carbondata.core.scan.model.QueryDimension)1 QueryMeasure (org.apache.carbondata.core.scan.model.QueryMeasure)1 CarbonColumnVector (org.apache.carbondata.core.scan.result.vector.CarbonColumnVector)1 CarbonColumnarBatch (org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch)1 DecimalType (org.apache.spark.sql.types.DecimalType)1 StructField (org.apache.spark.sql.types.StructField)1 StructType (org.apache.spark.sql.types.StructType)1