Search in sources :

Example 1 with CarbonColumnVectorImpl

use of org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl in project carbondata by apache.

the class CarbonVectorizedRecordReader method initBatch.

private void initBatch() {
    if (carbonColumnarBatch == null) {
        List<ProjectionDimension> queryDimension = queryModel.getProjectionDimensions();
        List<ProjectionMeasure> queryMeasures = queryModel.getProjectionMeasures();
        StructField[] fields = new StructField[queryDimension.size() + queryMeasures.size()];
        for (ProjectionDimension dim : queryDimension) {
            fields[dim.getOrdinal()] = new StructField(dim.getColumnName(), dim.getDimension().getDataType());
        }
        for (ProjectionMeasure msr : queryMeasures) {
            DataType dataType = msr.getMeasure().getDataType();
            if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.SHORT || dataType == DataTypes.INT || dataType == DataTypes.LONG || dataType == DataTypes.FLOAT || dataType == DataTypes.BYTE || dataType == DataTypes.BINARY) {
                fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), msr.getMeasure().getDataType());
            } else if (DataTypes.isDecimal(dataType)) {
                fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), DataTypes.createDecimalType(msr.getMeasure().getPrecision(), msr.getMeasure().getScale()));
            } else {
                fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), DataTypes.DOUBLE);
            }
        }
        CarbonColumnVector[] vectors = new CarbonColumnVector[fields.length];
        Map<String, Integer> columnMap = new HashMap<>();
        for (int i = 0; i < fields.length; i++) {
            vectors[i] = new CarbonColumnVectorImpl(CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT, fields[i].getDataType());
            if (columnMap.containsKey(fields[i].getFieldName())) {
                int reusedIndex = columnMap.get(fields[i].getFieldName());
                projectionMapping.add(reusedIndex);
            } else {
                columnMap.put(fields[i].getFieldName(), i);
                projectionMapping.add(i);
            }
        }
        carbonColumnarBatch = new CarbonColumnarBatch(vectors, CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT, new boolean[CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT]);
    }
}
Also used : HashMap(java.util.HashMap) CarbonColumnarBatch(org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch) CarbonColumnVectorImpl(org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl) CarbonColumnVector(org.apache.carbondata.core.scan.result.vector.CarbonColumnVector) ProjectionDimension(org.apache.carbondata.core.scan.model.ProjectionDimension) StructField(org.apache.carbondata.core.metadata.datatype.StructField) ProjectionMeasure(org.apache.carbondata.core.scan.model.ProjectionMeasure) DataType(org.apache.carbondata.core.metadata.datatype.DataType)

Example 2 with CarbonColumnVectorImpl

use of org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl in project carbondata by apache.

the class DimensionChunkReaderV3 method decodeDimension.

protected DimensionColumnPage decodeDimension(DimensionRawColumnChunk rawColumnPage, ByteBuffer pageData, DataChunk2 pageMetadata, int offset, ColumnVectorInfo vectorInfo, ReusableDataBuffer reusableDataBuffer) throws IOException {
    List<Encoding> encodings = pageMetadata.getEncoders();
    org.apache.carbondata.core.metadata.encoder.Encoding.validateEncodingTypes(encodings);
    if (CarbonUtil.isEncodedWithMeta(encodings)) {
        int[] invertedIndexes = new int[0];
        int[] invertedIndexesReverse = new int[0];
        // in case of no dictionary measure data types, if it is included in sort columns
        // then inverted index to be uncompressed
        boolean isExplicitSorted = CarbonUtil.hasEncoding(pageMetadata.encoders, Encoding.INVERTED_INDEX);
        int dataOffset = offset;
        if (isExplicitSorted) {
            offset += pageMetadata.data_page_length;
            invertedIndexes = CarbonUtil.getUnCompressColumnIndex(pageMetadata.rowid_page_length, pageData, offset);
            if (vectorInfo == null) {
                // get the reverse index
                invertedIndexesReverse = CarbonUtil.getInvertedReverseIndex(invertedIndexes);
            } else {
                vectorInfo.invertedIndex = invertedIndexes;
            }
        }
        BitSet nullBitSet = QueryUtil.getNullBitSet(pageMetadata.presence, this.compressor);
        // Store local dictionary from rawColumnPage so it can be used while filling the vector
        if (vectorInfo != null && !vectorInfo.vectorStack.isEmpty() && rawColumnPage.getLocalDictionary() != null) {
            ((CarbonColumnVectorImpl) (vectorInfo.vectorStack.peek().getColumnVector())).setLocalDictionary(rawColumnPage.getLocalDictionary());
        }
        ColumnPage decodedPage = decodeDimensionByMeta(pageMetadata, pageData, dataOffset, null != rawColumnPage.getLocalDictionary(), vectorInfo, nullBitSet, reusableDataBuffer);
        if (decodedPage != null) {
            decodedPage.setNullBits(nullBitSet);
        }
        return new ColumnPageWrapper(decodedPage, rawColumnPage.getLocalDictionary(), invertedIndexes, invertedIndexesReverse, isEncodedWithAdaptiveMeta(pageMetadata), isExplicitSorted);
    } else {
        // following code is for backward compatibility
        return decodeDimensionLegacy(rawColumnPage, pageData, pageMetadata, offset, vectorInfo, reusableDataBuffer);
    }
}
Also used : ColumnPageWrapper(org.apache.carbondata.core.datastore.chunk.store.ColumnPageWrapper) VariableLengthDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionColumnPage) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) FixedLengthDimensionColumnPage(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage) DimensionColumnPage(org.apache.carbondata.core.datastore.chunk.DimensionColumnPage) BitSet(java.util.BitSet) CarbonColumnVectorImpl(org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl) Encoding(org.apache.carbondata.format.Encoding)

Example 3 with CarbonColumnVectorImpl

use of org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl in project carbondata by apache.

the class LocalDictDimensionDataChunkStore method fillVector.

@Override
public void fillVector(int[] invertedIndex, int[] invertedIndexReverse, byte[] data, ColumnVectorInfo vectorInfo) {
    int columnValueSize = dimensionDataChunkStore.getColumnValueSize();
    int rowsNum = dataLength / columnValueSize;
    CarbonColumnVector vector = vectorInfo.vector;
    if (vector.getType().isComplexType()) {
        if (DataTypes.isStructType(vector.getType())) {
            int deletedRow = vectorInfo.deletedRows != null ? vectorInfo.deletedRows.cardinality() : 0;
            rowsNum = dataLength - deletedRow;
        } else {
            // this is not required to be changed in the case of the array because
            // datalength of the array already taking care of deleted rows in
            // ColumnVectorInfo.getUpdatedPageSizeForChildVector
            rowsNum = dataLength;
        }
        vector = vectorInfo.vectorStack.peek();
        CarbonColumnVector sliceVector = vector.getColumnVector();
        // use rowsNum as positionCount in order to create dictionary block
        sliceVector.setPositionCount(rowsNum);
        sliceVector.setIsLocalDictEnabledForComplextype(true);
    }
    if (!dictionary.isDictionaryUsed()) {
        vector.setDictionary(dictionary);
        dictionary.setDictionaryUsed();
    }
    BitSet nullBitset = new BitSet();
    CarbonColumnVector dictionaryVector = ColumnarVectorWrapperDirectFactory.getDirectVectorWrapperFactory(vectorInfo, vector.getDictionaryVector(), invertedIndex, nullBitset, vectorInfo.deletedRows, false, true);
    vector = ColumnarVectorWrapperDirectFactory.getDirectVectorWrapperFactory(vectorInfo, vector, invertedIndex, nullBitset, vectorInfo.deletedRows, false, false);
    // this check is in case of array of string type
    if (vectorInfo.vector.getType().isComplexType() && dictionaryVector instanceof CarbonColumnVectorImpl && ((CarbonColumnVectorImpl) dictionaryVector).getIntArraySize() < rowsNum) {
        ((CarbonColumnVectorImpl) dictionaryVector).increaseIntArraySize(rowsNum);
    }
    for (int i = 0; i < rowsNum; i++) {
        int surrogate = CarbonUtil.getSurrogateInternal(data, i * columnValueSize, columnValueSize);
        // MEMBER_DEFAULT_VAL_SURROGATE_KEY. Therefore check should be using MEMBER_DEFAULT_VAL_ARRAY
        if (surrogate == CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY || Arrays.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, dictionary.getDictionaryValue(surrogate))) {
            vector.putNull(i);
            dictionaryVector.putNull(i);
        } else {
            // if vector is 'ColumnarVectorWrapperDirectWithDeleteDelta', it needs to call 'putNotNull'
            // to increase 'counter', otherwise it will set the null value to the wrong index.
            vector.putNotNull(i);
            dictionaryVector.putInt(i, surrogate);
        }
    }
    if (dictionaryVector instanceof ConvertibleVector) {
        ((ConvertibleVector) dictionaryVector).convert();
    }
}
Also used : ConvertibleVector(org.apache.carbondata.core.scan.result.vector.impl.directread.ConvertibleVector) BitSet(java.util.BitSet) CarbonColumnVectorImpl(org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl) CarbonColumnVector(org.apache.carbondata.core.scan.result.vector.CarbonColumnVector)

Example 4 with CarbonColumnVectorImpl

use of org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl in project carbondata by apache.

the class CarbondataPageSource method getNextPageForRow.

private Page getNextPageForRow() {
    if (isFrstPage) {
        isFrstPage = false;
        initialReaderForRow();
    }
    if (nanoStart == 0) {
        nanoStart = System.nanoTime();
    }
    int count = 0;
    try {
        Block[] blocks = new Block[columnCount];
        CarbonColumnVectorImpl[] columns = new CarbonColumnVectorImpl[columnCount];
        for (int i = 0; i < columnCount; ++i) {
            columns[i] = CarbonVectorBatch.createDirectStreamReader(batchSize, dataTypes[i], fields[i]);
        }
        while (rowReader.nextKeyValue()) {
            Object[] values = (Object[]) rowReader.getCurrentValue();
            for (int index = 0; index < columnCount; index++) {
                columns[index].putObject(count, values[index]);
            }
            count++;
            if (count == batchSize) {
                break;
            }
        }
        if (count == 0) {
            close();
            return null;
        } else {
            for (int index = 0; index < columnCount; index++) {
                blocks[index] = ((PrestoVectorBlockBuilder) columns[index]).buildBlock();
                sizeOfData += blocks[index].getSizeInBytes();
            }
        }
        return new Page(count, blocks);
    } catch (PrestoException e) {
        closeWithSuppression(e);
        throw e;
    } catch (RuntimeException | IOException e) {
        closeWithSuppression(e);
        throw new CarbonDataLoadingException("Exception when creating the Carbon data Block", e);
    }
}
Also used : CarbonDataLoadingException(org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException) CarbonColumnVectorImpl(org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl) Page(io.prestosql.spi.Page) PrestoException(io.prestosql.spi.PrestoException) IOException(java.io.IOException) LazyBlock(io.prestosql.spi.block.LazyBlock) Block(io.prestosql.spi.block.Block)

Example 5 with CarbonColumnVectorImpl

use of org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl in project carbondata by apache.

the class ColumnarVectorWrapperDirectWithDeleteDeltaAndInvertedIndex method convert.

@Override
public void convert() {
    if (columnVector instanceof CarbonColumnVectorImpl) {
        CarbonColumnVectorImpl localVector = (CarbonColumnVectorImpl) columnVector;
        DataType dataType = columnVector.getType();
        int length = invertedIndex.length;
        int counter = 0;
        if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.BYTE) {
            byte[] dataArray = (byte[]) localVector.getDataArray();
            for (int i = 0; i < length; i++) {
                if (!deletedRows.get(i)) {
                    if (nullBits.get(i)) {
                        carbonColumnVector.putNull(counter++);
                    } else {
                        carbonColumnVector.putByte(counter++, dataArray[i]);
                    }
                }
            }
        } else if (dataType == DataTypes.SHORT) {
            short[] dataArray = (short[]) localVector.getDataArray();
            for (int i = 0; i < length; i++) {
                if (!deletedRows.get(i)) {
                    if (nullBits.get(i)) {
                        carbonColumnVector.putNull(counter++);
                    } else {
                        carbonColumnVector.putShort(counter++, dataArray[i]);
                    }
                }
            }
        } else if (dataType == DataTypes.INT) {
            int[] dataArray = (int[]) localVector.getDataArray();
            for (int i = 0; i < length; i++) {
                if (!deletedRows.get(i)) {
                    if (nullBits.get(i)) {
                        carbonColumnVector.putNull(counter++);
                    } else {
                        carbonColumnVector.putInt(counter++, dataArray[i]);
                    }
                }
            }
        } else if (dataType == DataTypes.LONG || dataType == DataTypes.TIMESTAMP) {
            long[] dataArray = (long[]) localVector.getDataArray();
            for (int i = 0; i < length; i++) {
                if (!deletedRows.get(i)) {
                    if (nullBits.get(i)) {
                        carbonColumnVector.putNull(counter++);
                    } else {
                        carbonColumnVector.putLong(counter++, dataArray[i]);
                    }
                }
            }
        } else if (dataType == DataTypes.FLOAT) {
            float[] dataArray = (float[]) localVector.getDataArray();
            for (int i = 0; i < length; i++) {
                if (!deletedRows.get(i)) {
                    if (nullBits.get(i)) {
                        carbonColumnVector.putNull(counter++);
                    } else {
                        carbonColumnVector.putFloat(counter++, dataArray[i]);
                    }
                }
            }
        } else if (dataType == DataTypes.DOUBLE) {
            double[] dataArray = (double[]) localVector.getDataArray();
            for (int i = 0; i < length; i++) {
                if (!deletedRows.get(i)) {
                    if (nullBits.get(i)) {
                        carbonColumnVector.putNull(counter++);
                    } else {
                        carbonColumnVector.putDouble(counter++, dataArray[i]);
                    }
                }
            }
        } else if (dataType instanceof DecimalType) {
            BigDecimal[] dataArray = (BigDecimal[]) localVector.getDataArray();
            for (int i = 0; i < length; i++) {
                if (!deletedRows.get(i)) {
                    if (nullBits.get(i)) {
                        carbonColumnVector.putNull(counter++);
                    } else {
                        carbonColumnVector.putDecimal(counter++, dataArray[i], precision);
                    }
                }
            }
        } else if (dataType == DataTypes.STRING || dataType == DataTypes.BYTE_ARRAY) {
            int[] offsets = localVector.getOffsets();
            int[] lengths = localVector.getLengths();
            if (offsets != null && lengths != null) {
                for (int i = 0; i < length; i++) {
                    if (!deletedRows.get(i)) {
                        if (nullBits.get(i)) {
                            carbonColumnVector.putNull(counter++);
                        } else {
                            carbonColumnVector.putArray(counter++, offsets[i], lengths[i]);
                        }
                    }
                }
            } else {
                byte[][] dataArray = (byte[][]) localVector.getDataArray();
                for (int i = 0; i < length; i++) {
                    if (!deletedRows.get(i)) {
                        if (nullBits.get(i)) {
                            carbonColumnVector.putNull(counter++);
                        } else {
                            carbonColumnVector.putByteArray(counter++, dataArray[i]);
                        }
                    }
                }
            }
        }
    }
}
Also used : CarbonColumnVectorImpl(org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl) DataType(org.apache.carbondata.core.metadata.datatype.DataType) DecimalType(org.apache.carbondata.core.metadata.datatype.DecimalType) BigDecimal(java.math.BigDecimal)

Aggregations

CarbonColumnVectorImpl (org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl)5 BitSet (java.util.BitSet)2 DataType (org.apache.carbondata.core.metadata.datatype.DataType)2 CarbonColumnVector (org.apache.carbondata.core.scan.result.vector.CarbonColumnVector)2 Page (io.prestosql.spi.Page)1 PrestoException (io.prestosql.spi.PrestoException)1 Block (io.prestosql.spi.block.Block)1 LazyBlock (io.prestosql.spi.block.LazyBlock)1 IOException (java.io.IOException)1 BigDecimal (java.math.BigDecimal)1 HashMap (java.util.HashMap)1 DimensionColumnPage (org.apache.carbondata.core.datastore.chunk.DimensionColumnPage)1 FixedLengthDimensionColumnPage (org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionColumnPage)1 VariableLengthDimensionColumnPage (org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionColumnPage)1 ColumnPageWrapper (org.apache.carbondata.core.datastore.chunk.store.ColumnPageWrapper)1 ColumnPage (org.apache.carbondata.core.datastore.page.ColumnPage)1 DecimalType (org.apache.carbondata.core.metadata.datatype.DecimalType)1 StructField (org.apache.carbondata.core.metadata.datatype.StructField)1 ProjectionDimension (org.apache.carbondata.core.scan.model.ProjectionDimension)1 ProjectionMeasure (org.apache.carbondata.core.scan.model.ProjectionMeasure)1