Search in sources :

Example 1 with VariableLengthDimensionDataChunk

use of org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk in project carbondata by apache.

the class CompressedDimensionChunkFileBasedReaderV3 method convertToDimensionChunk.

/**
   * Below method will be used to convert the compressed dimension chunk raw data to actual data
   *
   * @param dimensionRawColumnChunk dimension raw chunk
   * @param pageNumber              number
   * @return DimensionColumnDataChunk
   */
@Override
public DimensionColumnDataChunk convertToDimensionChunk(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
    byte[] dataPage = null;
    int[] invertedIndexes = null;
    int[] invertedIndexesReverse = null;
    int[] rlePage = null;
    // data chunk of page
    DataChunk2 dimensionColumnChunk = null;
    // data chunk of blocklet column
    DataChunk3 dataChunk3 = dimensionRawColumnChunk.getDataChunkV3();
    // get the data buffer
    ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
    dimensionColumnChunk = dataChunk3.getData_chunk_list().get(pageNumber);
    // calculating the start point of data
    // as buffer can contain multiple column data, start point will be datachunkoffset +
    // data chunk length + page offset
    int copySourcePoint = dimensionRawColumnChunk.getOffSet() + dimensionChunksLength.get(dimensionRawColumnChunk.getBlockletId()) + dataChunk3.getPage_offset().get(pageNumber);
    // first read the data and uncompressed it
    dataPage = COMPRESSOR.unCompressByte(rawData.array(), copySourcePoint, dimensionColumnChunk.data_page_length);
    copySourcePoint += dimensionColumnChunk.data_page_length;
    // if row id block is present then read the row id chunk and uncompress it
    if (hasEncoding(dimensionColumnChunk.encoders, Encoding.INVERTED_INDEX)) {
        invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dimensionColumnChunk.rowid_page_length, rawData, copySourcePoint);
        copySourcePoint += dimensionColumnChunk.rowid_page_length;
        // get the reverse index
        invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
    }
    //then actual data based on rle block
    if (hasEncoding(dimensionColumnChunk.encoders, Encoding.RLE)) {
        rlePage = CarbonUtil.getIntArray(rawData, copySourcePoint, dimensionColumnChunk.rle_page_length);
        // uncompress the data with rle indexes
        dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()]);
        rlePage = null;
    }
    // fill chunk attributes
    DimensionColumnDataChunk columnDataChunk = null;
    if (dimensionColumnChunk.isRowMajor()) {
        // to store fixed length column chunk values
        columnDataChunk = new ColumnGroupDimensionDataChunk(dataPage, eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()], dimensionRawColumnChunk.getRowCount()[pageNumber]);
    } else // and set to data chunk instance
    if (!hasEncoding(dimensionColumnChunk.encoders, Encoding.DICTIONARY)) {
        columnDataChunk = new VariableLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, dimensionRawColumnChunk.getRowCount()[pageNumber]);
    } else {
        // to store fixed length column chunk values
        columnDataChunk = new FixedLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, dimensionRawColumnChunk.getRowCount()[pageNumber], eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()]);
    }
    return columnDataChunk;
}
Also used : FixedLengthDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk) DataChunk2(org.apache.carbondata.format.DataChunk2) DimensionColumnDataChunk(org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk) DataChunk3(org.apache.carbondata.format.DataChunk3) ColumnGroupDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionDataChunk) ByteBuffer(java.nio.ByteBuffer) VariableLengthDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk)

Example 2 with VariableLengthDimensionDataChunk

use of org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk in project carbondata by apache.

the class RowLevelFilterExecuterImpl method createRow.

/**
   * Method will read the members of particular dimension block and create
   * a row instance for further processing of the filters
   *
   * @param blockChunkHolder
   * @param row
   * @param index
   * @throws IOException
   */
private void createRow(BlocksChunkHolder blockChunkHolder, RowIntf row, int pageIndex, int index) throws IOException {
    Object[] record = new Object[dimColEvaluatorInfoList.size() + msrColEvalutorInfoList.size()];
    String memberString;
    for (int i = 0; i < dimColEvaluatorInfoList.size(); i++) {
        DimColumnResolvedFilterInfo dimColumnEvaluatorInfo = dimColEvaluatorInfoList.get(i);
        // if filter dimension is not present in the current add its default value
        if (!isDimensionPresentInCurrentBlock[i]) {
            // fill default value here
            record[dimColumnEvaluatorInfo.getRowIndex()] = getDimensionDefaultValue(dimColumnEvaluatorInfo);
            continue;
        }
        if (dimColumnEvaluatorInfo.getDimension().getDataType() != DataType.ARRAY && dimColumnEvaluatorInfo.getDimension().getDataType() != DataType.STRUCT) {
            if (!dimColumnEvaluatorInfo.isDimensionExistsInCurrentSilce()) {
                record[dimColumnEvaluatorInfo.getRowIndex()] = dimColumnEvaluatorInfo.getDimension().getDefaultValue();
            }
            DimensionColumnDataChunk columnDataChunk = blockChunkHolder.getDimensionRawDataChunk()[dimensionBlocksIndex[i]].convertToDimColDataChunk(pageIndex);
            if (!dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DICTIONARY) && columnDataChunk instanceof VariableLengthDimensionDataChunk) {
                VariableLengthDimensionDataChunk dimensionColumnDataChunk = (VariableLengthDimensionDataChunk) columnDataChunk;
                byte[] memberBytes = dimensionColumnDataChunk.getChunkData(index);
                if (null != memberBytes) {
                    if (Arrays.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, memberBytes)) {
                        memberBytes = null;
                    }
                    record[dimColumnEvaluatorInfo.getRowIndex()] = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(memberBytes, dimColumnEvaluatorInfo.getDimension().getDataType());
                } else {
                    continue;
                }
            } else {
                int dictionaryValue = readSurrogatesFromColumnBlock(blockChunkHolder, index, pageIndex, dimColumnEvaluatorInfo, dimensionBlocksIndex[i]);
                if (dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DICTIONARY) && !dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
                    memberString = getFilterActualValueFromDictionaryValue(dimColumnEvaluatorInfo, dictionaryValue);
                    record[dimColumnEvaluatorInfo.getRowIndex()] = DataTypeUtil.getDataBasedOnDataType(memberString, dimColumnEvaluatorInfo.getDimension().getDataType());
                } else if (dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
                    Object member = getFilterActualValueFromDirectDictionaryValue(dimColumnEvaluatorInfo, dictionaryValue);
                    record[dimColumnEvaluatorInfo.getRowIndex()] = member;
                }
            }
        } else {
            try {
                GenericQueryType complexType = complexDimensionInfoMap.get(dimensionBlocksIndex[i]);
                ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
                DataOutputStream dataOutputStream = new DataOutputStream(byteStream);
                complexType.parseBlocksAndReturnComplexColumnByteArray(blockChunkHolder.getDimensionRawDataChunk(), index, pageIndex, dataOutputStream);
                record[dimColumnEvaluatorInfo.getRowIndex()] = complexType.getDataBasedOnDataTypeFromSurrogates(ByteBuffer.wrap(byteStream.toByteArray()));
                byteStream.close();
            } catch (IOException e) {
                LOGGER.info(e.getMessage());
            }
        }
    }
    DataType msrType;
    for (int i = 0; i < msrColEvalutorInfoList.size(); i++) {
        MeasureColumnResolvedFilterInfo msrColumnEvalutorInfo = msrColEvalutorInfoList.get(i);
        switch(msrColumnEvalutorInfo.getType()) {
            case SHORT:
                msrType = DataType.SHORT;
                break;
            case INT:
                msrType = DataType.INT;
                break;
            case LONG:
                msrType = DataType.LONG;
                break;
            case DECIMAL:
                msrType = DataType.DECIMAL;
                break;
            default:
                msrType = DataType.DOUBLE;
        }
        // in the current block measure list
        if (!isMeasurePresentInCurrentBlock[i]) {
            byte[] defaultValue = msrColumnEvalutorInfo.getCarbonColumn().getDefaultValue();
            record[msrColumnEvalutorInfo.getRowIndex()] = RestructureUtil.getMeasureDefaultValue(msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema(), defaultValue);
            continue;
        }
        Object msrValue;
        MeasureColumnDataChunk measureColumnDataChunk = blockChunkHolder.getMeasureRawDataChunk()[measureBlocksIndex[0]].convertToMeasureColDataChunk(pageIndex);
        switch(msrType) {
            case SHORT:
                msrValue = (short) measureColumnDataChunk.getMeasureDataHolder().getReadableLongValueByIndex(index);
                break;
            case INT:
                msrValue = (int) measureColumnDataChunk.getMeasureDataHolder().getReadableLongValueByIndex(index);
                break;
            case LONG:
                msrValue = measureColumnDataChunk.getMeasureDataHolder().getReadableLongValueByIndex(index);
                break;
            case DECIMAL:
                BigDecimal bigDecimalValue = measureColumnDataChunk.getMeasureDataHolder().getReadableBigDecimalValueByIndex(index);
                if (null != bigDecimalValue && msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema().getScale() > bigDecimalValue.scale()) {
                    bigDecimalValue = bigDecimalValue.setScale(msrColumnEvalutorInfo.getCarbonColumn().getColumnSchema().getScale(), RoundingMode.HALF_UP);
                }
                msrValue = bigDecimalValue;
                break;
            default:
                msrValue = measureColumnDataChunk.getMeasureDataHolder().getReadableDoubleValueByIndex(index);
        }
        record[msrColumnEvalutorInfo.getRowIndex()] = measureColumnDataChunk.getNullValueIndexHolder().getBitSet().get(index) ? null : msrValue;
    }
    row.setValues(record);
}
Also used : MeasureColumnResolvedFilterInfo(org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo) DimColumnResolvedFilterInfo(org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo) DataOutputStream(java.io.DataOutputStream) DimensionColumnDataChunk(org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) MeasureColumnDataChunk(org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk) VariableLengthDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk) BigDecimal(java.math.BigDecimal) GenericQueryType(org.apache.carbondata.core.scan.filter.GenericQueryType) DataType(org.apache.carbondata.core.metadata.datatype.DataType)

Example 3 with VariableLengthDimensionDataChunk

use of org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk in project carbondata by apache.

the class CompressedDimensionChunkFileBasedReaderV2 method convertToDimensionChunk.

public DimensionColumnDataChunk convertToDimensionChunk(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
    byte[] dataPage = null;
    int[] invertedIndexes = null;
    int[] invertedIndexesReverse = null;
    int[] rlePage = null;
    DataChunk2 dimensionColumnChunk = null;
    int copySourcePoint = dimensionRawColumnChunk.getOffSet();
    int blockIndex = dimensionRawColumnChunk.getBlockletId();
    ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
    if (dimensionChunksOffset.size() - 1 == blockIndex) {
        dimensionColumnChunk = CarbonUtil.readDataChunk(rawData, copySourcePoint, dimensionRawColumnChunk.getLength());
        int totalDimensionDataLength = dimensionColumnChunk.data_page_length + dimensionColumnChunk.rle_page_length + dimensionColumnChunk.rowid_page_length;
        synchronized (dimensionRawColumnChunk.getFileReader()) {
            rawData = dimensionRawColumnChunk.getFileReader().readByteBuffer(filePath, dimensionChunksOffset.get(blockIndex) + dimensionChunksLength.get(blockIndex), totalDimensionDataLength);
        }
    } else {
        dimensionColumnChunk = CarbonUtil.readDataChunk(rawData, copySourcePoint, dimensionChunksLength.get(blockIndex));
        copySourcePoint += dimensionChunksLength.get(blockIndex);
    }
    // first read the data and uncompressed it
    dataPage = COMPRESSOR.unCompressByte(rawData.array(), copySourcePoint, dimensionColumnChunk.data_page_length);
    copySourcePoint += dimensionColumnChunk.data_page_length;
    // if row id block is present then read the row id chunk and uncompress it
    if (hasEncoding(dimensionColumnChunk.encoders, Encoding.INVERTED_INDEX)) {
        byte[] dataInv = new byte[dimensionColumnChunk.rowid_page_length];
        rawData.position(copySourcePoint);
        rawData.get(dataInv);
        invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dimensionColumnChunk.rowid_page_length, dataInv, numberComressor, 0);
        copySourcePoint += dimensionColumnChunk.rowid_page_length;
        // get the reverse index
        invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
    }
    //then actual data based on rle block
    if (hasEncoding(dimensionColumnChunk.encoders, Encoding.RLE)) {
        byte[] dataRle = new byte[dimensionColumnChunk.rle_page_length];
        rawData.position(copySourcePoint);
        rawData.get(dataRle);
        rlePage = numberComressor.unCompress(dataRle, 0, dimensionColumnChunk.rle_page_length);
        // uncompress the data with rle indexes
        dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[blockIndex]);
    }
    // fill chunk attributes
    DimensionColumnDataChunk columnDataChunk = null;
    if (dimensionColumnChunk.isRowMajor()) {
        // to store fixed length column chunk values
        columnDataChunk = new ColumnGroupDimensionDataChunk(dataPage, eachColumnValueSize[blockIndex], numberOfRows);
    } else // and set to data chunk instance
    if (!hasEncoding(dimensionColumnChunk.encoders, Encoding.DICTIONARY)) {
        columnDataChunk = new VariableLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows);
    } else {
        // to store fixed length column chunk values
        columnDataChunk = new FixedLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows, eachColumnValueSize[blockIndex]);
    }
    return columnDataChunk;
}
Also used : FixedLengthDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk) DataChunk2(org.apache.carbondata.format.DataChunk2) DimensionColumnDataChunk(org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk) ColumnGroupDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionDataChunk) ByteBuffer(java.nio.ByteBuffer) VariableLengthDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk)

Example 4 with VariableLengthDimensionDataChunk

use of org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk in project carbondata by apache.

the class CompressedDimensionChunkFileBasedReaderV1 method convertToDimensionChunk.

@Override
public DimensionColumnDataChunk convertToDimensionChunk(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
    int blockIndex = dimensionRawColumnChunk.getBlockletId();
    byte[] dataPage = null;
    int[] invertedIndexes = null;
    int[] invertedIndexesReverse = null;
    int[] rlePage = null;
    FileHolder fileReader = dimensionRawColumnChunk.getFileReader();
    ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
    dataPage = COMPRESSOR.unCompressByte(rawData.array(), dimensionRawColumnChunk.getOffSet(), dimensionRawColumnChunk.getLength());
    // if row id block is present then read the row id chunk and uncompress it
    DataChunk dataChunk = dimensionColumnChunk.get(blockIndex);
    if (CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.INVERTED_INDEX)) {
        byte[] columnIndexData;
        synchronized (fileReader) {
            columnIndexData = fileReader.readByteArray(filePath, dataChunk.getRowIdPageOffset(), dataChunk.getRowIdPageLength());
        }
        invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dataChunk.getRowIdPageLength(), columnIndexData, numberComressor, 0);
        // get the reverse index
        invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
    }
    //then actual data based on rle block
    if (CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.RLE)) {
        // read and uncompress the rle block
        byte[] key;
        synchronized (fileReader) {
            key = fileReader.readByteArray(filePath, dataChunk.getRlePageOffset(), dataChunk.getRlePageLength());
        }
        rlePage = numberComressor.unCompress(key, 0, dataChunk.getRlePageLength());
        // uncompress the data with rle indexes
        dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[blockIndex]);
        rlePage = null;
    }
    // fill chunk attributes
    DimensionColumnDataChunk columnDataChunk = null;
    if (dataChunk.isRowMajor()) {
        // to store fixed length column chunk values
        columnDataChunk = new ColumnGroupDimensionDataChunk(dataPage, eachColumnValueSize[blockIndex], numberOfRows);
    } else // and set to data chunk instance
    if (!CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.DICTIONARY)) {
        columnDataChunk = new VariableLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows);
    } else {
        // to store fixed length column chunk values
        columnDataChunk = new FixedLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, numberOfRows, eachColumnValueSize[blockIndex]);
    }
    return columnDataChunk;
}
Also used : FixedLengthDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk) DimensionColumnDataChunk(org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk) ColumnGroupDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionDataChunk) DimensionColumnDataChunk(org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk) ColumnGroupDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionDataChunk) VariableLengthDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk) DataChunk(org.apache.carbondata.core.metadata.blocklet.datachunk.DataChunk) FixedLengthDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk) ByteBuffer(java.nio.ByteBuffer) FileHolder(org.apache.carbondata.core.datastore.FileHolder) VariableLengthDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk)

Aggregations

DimensionColumnDataChunk (org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk)4 VariableLengthDimensionDataChunk (org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk)4 ByteBuffer (java.nio.ByteBuffer)3 ColumnGroupDimensionDataChunk (org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionDataChunk)3 FixedLengthDimensionDataChunk (org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk)3 DataChunk2 (org.apache.carbondata.format.DataChunk2)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataOutputStream (java.io.DataOutputStream)1 IOException (java.io.IOException)1 BigDecimal (java.math.BigDecimal)1 FileHolder (org.apache.carbondata.core.datastore.FileHolder)1 MeasureColumnDataChunk (org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk)1 DataChunk (org.apache.carbondata.core.metadata.blocklet.datachunk.DataChunk)1 DataType (org.apache.carbondata.core.metadata.datatype.DataType)1 GenericQueryType (org.apache.carbondata.core.scan.filter.GenericQueryType)1 DimColumnResolvedFilterInfo (org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo)1 MeasureColumnResolvedFilterInfo (org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo)1 DataChunk3 (org.apache.carbondata.format.DataChunk3)1