Search in sources :

Example 1 with DataChunk2

use of org.apache.carbondata.format.DataChunk2 in project carbondata by apache.

the class CarbonMetadataUtil method getDatachunk2.

/**
   * Below method will be used to get the data chunk object for all the columns
   *
   * @param blockletInfoColumnar blocklet info
   * @param columnSchema        list of columns
   * @param segmentProperties    segment properties
   * @return list of data chunks
   * @throws IOException
   */
public static List<DataChunk2> getDatachunk2(BlockletInfoColumnar blockletInfoColumnar, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties) throws IOException {
    List<DataChunk2> colDataChunks = new ArrayList<DataChunk2>();
    int rowIdIndex = 0;
    int aggregateIndex = 0;
    boolean[] isSortedKeyColumn = blockletInfoColumnar.getIsSortedKeyColumn();
    boolean[] aggKeyBlock = blockletInfoColumnar.getAggKeyBlock();
    boolean[] colGrpblock = blockletInfoColumnar.getColGrpBlocks();
    for (int i = 0; i < blockletInfoColumnar.getKeyLengths().length; i++) {
        DataChunk2 dataChunk = new DataChunk2();
        dataChunk.setChunk_meta(getChunkCompressionMeta());
        List<Encoding> encodings = new ArrayList<Encoding>();
        if (containsEncoding(i, Encoding.DICTIONARY, columnSchema, segmentProperties)) {
            encodings.add(Encoding.DICTIONARY);
        }
        if (containsEncoding(i, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) {
            encodings.add(Encoding.DIRECT_DICTIONARY);
        }
        dataChunk.setRowMajor(colGrpblock[i]);
        // TODO : Once schema PR is merged and information needs to be passed
        // here.
        dataChunk.setData_page_length(blockletInfoColumnar.getKeyLengths()[i]);
        if (aggKeyBlock[i]) {
            dataChunk.setRle_page_length(blockletInfoColumnar.getDataIndexMapLength()[aggregateIndex]);
            encodings.add(Encoding.RLE);
            aggregateIndex++;
        }
        dataChunk.setSort_state(isSortedKeyColumn[i] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE);
        if (!isSortedKeyColumn[i]) {
            dataChunk.setRowid_page_length(blockletInfoColumnar.getKeyBlockIndexLength()[rowIdIndex]);
            encodings.add(Encoding.INVERTED_INDEX);
            rowIdIndex++;
        }
        // TODO : Right now the encodings are happening at runtime. change as per
        // this encoders.
        dataChunk.setEncoders(encodings);
        colDataChunks.add(dataChunk);
    }
    for (int i = 0; i < blockletInfoColumnar.getMeasureLength().length; i++) {
        DataChunk2 dataChunk = new DataChunk2();
        dataChunk.setChunk_meta(getChunkCompressionMeta());
        dataChunk.setRowMajor(false);
        // TODO : Once schema PR is merged and information needs to be passed
        // here.
        dataChunk.setData_page_length(blockletInfoColumnar.getMeasureLength()[i]);
        // TODO : Right now the encodings are happening at runtime. change as per
        // this encoders.
        List<Encoding> encodings = new ArrayList<Encoding>();
        encodings.add(Encoding.DELTA);
        dataChunk.setEncoders(encodings);
        // TODO writing dummy presence meta need to set actual presence
        // meta
        PresenceMeta presenceMeta = new PresenceMeta();
        presenceMeta.setPresent_bit_streamIsSet(true);
        presenceMeta.setPresent_bit_stream(CompressorFactory.getInstance().getCompressor().compressByte(blockletInfoColumnar.getMeasureNullValueIndex()[i].toByteArray()));
        dataChunk.setPresence(presenceMeta);
        // TODO : PresenceMeta needs to be implemented and set here
        // dataChunk.setPresence(new PresenceMeta());
        // TODO : Need to write ValueCompression meta here.
        List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>();
        encoderMetaList.add(ByteBuffer.wrap(serializeEncoderMeta(createValueEncoderMeta(blockletInfoColumnar.getCompressionModel(), i))));
        dataChunk.setEncoder_meta(encoderMetaList);
        colDataChunks.add(dataChunk);
    }
    return colDataChunks;
}
Also used : DataChunk2(org.apache.carbondata.format.DataChunk2) ArrayList(java.util.ArrayList) Encoding(org.apache.carbondata.format.Encoding) PresenceMeta(org.apache.carbondata.format.PresenceMeta) ByteBuffer(java.nio.ByteBuffer)

Example 2 with DataChunk2

use of org.apache.carbondata.format.DataChunk2 in project carbondata by apache.

the class CarbonMetadataUtil method getDataChunk3.

public static DataChunk3 getDataChunk3(List<NodeHolder> nodeHolderList, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties, int index, boolean isDimensionColumn) throws IOException {
    List<DataChunk2> dataChunksList = getDatachunk2(nodeHolderList, columnSchema, segmentProperties, index, isDimensionColumn);
    int offset = 0;
    DataChunk3 dataChunk = new DataChunk3();
    List<Integer> pageOffsets = new ArrayList<>();
    List<Integer> pageLengths = new ArrayList<>();
    int length = 0;
    for (int i = 0; i < dataChunksList.size(); i++) {
        pageOffsets.add(offset);
        length = dataChunksList.get(i).getData_page_length() + dataChunksList.get(i).getRle_page_length() + dataChunksList.get(i).getRowid_page_length();
        pageLengths.add(length);
        offset += length;
    }
    dataChunk.setData_chunk_list(dataChunksList);
    dataChunk.setPage_length(pageLengths);
    dataChunk.setPage_offset(pageOffsets);
    return dataChunk;
}
Also used : DataChunk2(org.apache.carbondata.format.DataChunk2) ArrayList(java.util.ArrayList) DataChunk3(org.apache.carbondata.format.DataChunk3)

Example 3 with DataChunk2

use of org.apache.carbondata.format.DataChunk2 in project carbondata by apache.

the class CompressedMeasureChunkFileBasedReaderV2 method convertToMeasureChunk.

public MeasureColumnDataChunk convertToMeasureChunk(MeasureRawColumnChunk measureRawColumnChunk, int pageNumber) throws IOException {
    MeasureColumnDataChunk datChunk = new MeasureColumnDataChunk();
    DataChunk2 measureColumnChunk = null;
    int copyPoint = measureRawColumnChunk.getOffSet();
    int blockIndex = measureRawColumnChunk.getBlockletId();
    ByteBuffer rawData = measureRawColumnChunk.getRawData();
    if (measureColumnChunkOffsets.size() - 1 == blockIndex) {
        measureColumnChunk = CarbonUtil.readDataChunk(rawData, copyPoint, measureColumnChunkLength.get(blockIndex));
        synchronized (measureRawColumnChunk.getFileReader()) {
            rawData = measureRawColumnChunk.getFileReader().readByteBuffer(filePath, measureColumnChunkOffsets.get(blockIndex) + measureColumnChunkLength.get(blockIndex), measureColumnChunk.data_page_length);
        }
    } else {
        measureColumnChunk = CarbonUtil.readDataChunk(rawData, copyPoint, measureColumnChunkLength.get(blockIndex));
        copyPoint += measureColumnChunkLength.get(blockIndex);
    }
    List<ValueEncoderMeta> valueEncodeMeta = new ArrayList<>();
    for (int i = 0; i < measureColumnChunk.getEncoder_meta().size(); i++) {
        valueEncodeMeta.add(CarbonUtil.deserializeEncoderMeta(measureColumnChunk.getEncoder_meta().get(i).array()));
    }
    WriterCompressModel compressionModel = CarbonUtil.getValueCompressionModel(valueEncodeMeta);
    ValueCompressionHolder values = compressionModel.getValueCompressionHolder()[0];
    // uncompress
    values.uncompress(compressionModel.getConvertedDataType()[0], rawData.array(), copyPoint, measureColumnChunk.data_page_length, compressionModel.getMantissa()[0], compressionModel.getMaxValue()[0], numberOfRows);
    CarbonReadDataHolder measureDataHolder = new CarbonReadDataHolder(values);
    // set the data chunk
    datChunk.setMeasureDataHolder(measureDataHolder);
    // set the enun value indexes
    datChunk.setNullValueIndexHolder(getPresenceMeta(measureColumnChunk.presence));
    return datChunk;
}
Also used : WriterCompressModel(org.apache.carbondata.core.datastore.compression.WriterCompressModel) ValueCompressionHolder(org.apache.carbondata.core.datastore.compression.ValueCompressionHolder) DataChunk2(org.apache.carbondata.format.DataChunk2) ArrayList(java.util.ArrayList) CarbonReadDataHolder(org.apache.carbondata.core.datastore.dataholder.CarbonReadDataHolder) ValueEncoderMeta(org.apache.carbondata.core.metadata.ValueEncoderMeta) MeasureColumnDataChunk(org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk) ByteBuffer(java.nio.ByteBuffer)

Example 4 with DataChunk2

use of org.apache.carbondata.format.DataChunk2 in project carbondata by apache.

the class CompressedMeasureChunkFileBasedReaderV3 method convertToMeasureChunk.

/**
   * Below method will be used to convert the compressed measure chunk raw data to actual data
   *
   * @param measureRawColumnChunk measure raw chunk
   * @param pageNumber            number
   * @return DimensionColumnDataChunk
   */
@Override
public MeasureColumnDataChunk convertToMeasureChunk(MeasureRawColumnChunk measureRawColumnChunk, int pageNumber) throws IOException {
    MeasureColumnDataChunk datChunk = new MeasureColumnDataChunk();
    // data chunk of blocklet column
    DataChunk3 dataChunk3 = measureRawColumnChunk.getDataChunkV3();
    // data chunk of page
    DataChunk2 measureColumnChunk = dataChunk3.getData_chunk_list().get(pageNumber);
    // calculating the start point of data
    // as buffer can contain multiple column data, start point will be datachunkoffset +
    // data chunk length + page offset
    int copyPoint = measureRawColumnChunk.getOffSet() + measureColumnChunkLength.get(measureRawColumnChunk.getBlockletId()) + dataChunk3.getPage_offset().get(pageNumber);
    List<ValueEncoderMeta> valueEncodeMeta = new ArrayList<>();
    for (int i = 0; i < measureColumnChunk.getEncoder_meta().size(); i++) {
        valueEncodeMeta.add(CarbonUtil.deserializeEncoderMetaNew(measureColumnChunk.getEncoder_meta().get(i).array()));
    }
    WriterCompressModel compressionModel = CarbonUtil.getValueCompressionModel(valueEncodeMeta);
    ValueCompressionHolder values = compressionModel.getValueCompressionHolder()[0];
    // uncompress
    ByteBuffer rawData = measureRawColumnChunk.getRawData();
    values.uncompress(compressionModel.getConvertedDataType()[0], rawData.array(), copyPoint, measureColumnChunk.data_page_length, compressionModel.getMantissa()[0], compressionModel.getMaxValue()[0], measureRawColumnChunk.getRowCount()[pageNumber]);
    CarbonReadDataHolder measureDataHolder = new CarbonReadDataHolder(values);
    // set the data chunk
    datChunk.setMeasureDataHolder(measureDataHolder);
    // set the null value indexes
    datChunk.setNullValueIndexHolder(getPresenceMeta(measureColumnChunk.presence));
    return datChunk;
}
Also used : WriterCompressModel(org.apache.carbondata.core.datastore.compression.WriterCompressModel) ValueCompressionHolder(org.apache.carbondata.core.datastore.compression.ValueCompressionHolder) DataChunk2(org.apache.carbondata.format.DataChunk2) ArrayList(java.util.ArrayList) DataChunk3(org.apache.carbondata.format.DataChunk3) CarbonReadDataHolder(org.apache.carbondata.core.datastore.dataholder.CarbonReadDataHolder) ValueEncoderMeta(org.apache.carbondata.core.metadata.ValueEncoderMeta) MeasureColumnDataChunk(org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk) ByteBuffer(java.nio.ByteBuffer)

Example 5 with DataChunk2

use of org.apache.carbondata.format.DataChunk2 in project carbondata by apache.

the class CompressedDimensionChunkFileBasedReaderV3 method convertToDimensionChunk.

/**
   * Below method will be used to convert the compressed dimension chunk raw data to actual data
   *
   * @param dimensionRawColumnChunk dimension raw chunk
   * @param pageNumber              number
   * @return DimensionColumnDataChunk
   */
@Override
public DimensionColumnDataChunk convertToDimensionChunk(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
    byte[] dataPage = null;
    int[] invertedIndexes = null;
    int[] invertedIndexesReverse = null;
    int[] rlePage = null;
    // data chunk of page
    DataChunk2 dimensionColumnChunk = null;
    // data chunk of blocklet column
    DataChunk3 dataChunk3 = dimensionRawColumnChunk.getDataChunkV3();
    // get the data buffer
    ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
    dimensionColumnChunk = dataChunk3.getData_chunk_list().get(pageNumber);
    // calculating the start point of data
    // as buffer can contain multiple column data, start point will be datachunkoffset +
    // data chunk length + page offset
    int copySourcePoint = dimensionRawColumnChunk.getOffSet() + dimensionChunksLength.get(dimensionRawColumnChunk.getBlockletId()) + dataChunk3.getPage_offset().get(pageNumber);
    // first read the data and uncompressed it
    dataPage = COMPRESSOR.unCompressByte(rawData.array(), copySourcePoint, dimensionColumnChunk.data_page_length);
    copySourcePoint += dimensionColumnChunk.data_page_length;
    // if row id block is present then read the row id chunk and uncompress it
    if (hasEncoding(dimensionColumnChunk.encoders, Encoding.INVERTED_INDEX)) {
        invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dimensionColumnChunk.rowid_page_length, rawData, copySourcePoint);
        copySourcePoint += dimensionColumnChunk.rowid_page_length;
        // get the reverse index
        invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
    }
    //then actual data based on rle block
    if (hasEncoding(dimensionColumnChunk.encoders, Encoding.RLE)) {
        rlePage = CarbonUtil.getIntArray(rawData, copySourcePoint, dimensionColumnChunk.rle_page_length);
        // uncompress the data with rle indexes
        dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()]);
        rlePage = null;
    }
    // fill chunk attributes
    DimensionColumnDataChunk columnDataChunk = null;
    if (dimensionColumnChunk.isRowMajor()) {
        // to store fixed length column chunk values
        columnDataChunk = new ColumnGroupDimensionDataChunk(dataPage, eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()], dimensionRawColumnChunk.getRowCount()[pageNumber]);
    } else // and set to data chunk instance
    if (!hasEncoding(dimensionColumnChunk.encoders, Encoding.DICTIONARY)) {
        columnDataChunk = new VariableLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, dimensionRawColumnChunk.getRowCount()[pageNumber]);
    } else {
        // to store fixed length column chunk values
        columnDataChunk = new FixedLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, dimensionRawColumnChunk.getRowCount()[pageNumber], eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()]);
    }
    return columnDataChunk;
}
Also used : FixedLengthDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk) DataChunk2(org.apache.carbondata.format.DataChunk2) DimensionColumnDataChunk(org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk) DataChunk3(org.apache.carbondata.format.DataChunk3) ColumnGroupDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionDataChunk) ByteBuffer(java.nio.ByteBuffer) VariableLengthDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk)

Aggregations

DataChunk2 (org.apache.carbondata.format.DataChunk2)9 ByteBuffer (java.nio.ByteBuffer)8 ArrayList (java.util.ArrayList)6 DataChunk3 (org.apache.carbondata.format.DataChunk3)3 Encoding (org.apache.carbondata.format.Encoding)3 PresenceMeta (org.apache.carbondata.format.PresenceMeta)3 DimensionColumnDataChunk (org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk)2 MeasureColumnDataChunk (org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk)2 ColumnGroupDimensionDataChunk (org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionDataChunk)2 FixedLengthDimensionDataChunk (org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk)2 VariableLengthDimensionDataChunk (org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk)2 ValueCompressionHolder (org.apache.carbondata.core.datastore.compression.ValueCompressionHolder)2 WriterCompressModel (org.apache.carbondata.core.datastore.compression.WriterCompressModel)2 CarbonReadDataHolder (org.apache.carbondata.core.datastore.dataholder.CarbonReadDataHolder)2 ValueEncoderMeta (org.apache.carbondata.core.metadata.ValueEncoderMeta)2 BlockletMinMaxIndex (org.apache.carbondata.format.BlockletMinMaxIndex)2 IOException (java.io.IOException)1 BlockletInfoColumnar (org.apache.carbondata.core.metadata.BlockletInfoColumnar)1 ColumnarFormatVersion (org.apache.carbondata.core.metadata.ColumnarFormatVersion)1 CarbonDataWriterException (org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)1