Search in sources :

Example 6 with DataChunk3

use of org.apache.carbondata.format.DataChunk3 in project carbondata by apache.

the class CompressedDimensionChunkFileBasedReaderV3 method convertToDimensionChunk.

/**
   * Below method will be used to convert the compressed dimension chunk raw data to actual data
   *
   * @param dimensionRawColumnChunk dimension raw chunk
   * @param pageNumber              number
   * @return DimensionColumnDataChunk
   */
@Override
public DimensionColumnDataChunk convertToDimensionChunk(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
    byte[] dataPage = null;
    int[] invertedIndexes = null;
    int[] invertedIndexesReverse = null;
    int[] rlePage = null;
    // data chunk of page
    DataChunk2 dimensionColumnChunk = null;
    // data chunk of blocklet column
    DataChunk3 dataChunk3 = dimensionRawColumnChunk.getDataChunkV3();
    // get the data buffer
    ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
    dimensionColumnChunk = dataChunk3.getData_chunk_list().get(pageNumber);
    // calculating the start point of data
    // as buffer can contain multiple column data, start point will be datachunkoffset +
    // data chunk length + page offset
    int copySourcePoint = dimensionRawColumnChunk.getOffSet() + dimensionChunksLength.get(dimensionRawColumnChunk.getBlockletId()) + dataChunk3.getPage_offset().get(pageNumber);
    // first read the data and uncompressed it
    dataPage = COMPRESSOR.unCompressByte(rawData.array(), copySourcePoint, dimensionColumnChunk.data_page_length);
    copySourcePoint += dimensionColumnChunk.data_page_length;
    // if row id block is present then read the row id chunk and uncompress it
    if (hasEncoding(dimensionColumnChunk.encoders, Encoding.INVERTED_INDEX)) {
        invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dimensionColumnChunk.rowid_page_length, rawData, copySourcePoint);
        copySourcePoint += dimensionColumnChunk.rowid_page_length;
        // get the reverse index
        invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
    }
    //then actual data based on rle block
    if (hasEncoding(dimensionColumnChunk.encoders, Encoding.RLE)) {
        rlePage = CarbonUtil.getIntArray(rawData, copySourcePoint, dimensionColumnChunk.rle_page_length);
        // uncompress the data with rle indexes
        dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()]);
        rlePage = null;
    }
    // fill chunk attributes
    DimensionColumnDataChunk columnDataChunk = null;
    if (dimensionColumnChunk.isRowMajor()) {
        // to store fixed length column chunk values
        columnDataChunk = new ColumnGroupDimensionDataChunk(dataPage, eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()], dimensionRawColumnChunk.getRowCount()[pageNumber]);
    } else // and set to data chunk instance
    if (!hasEncoding(dimensionColumnChunk.encoders, Encoding.DICTIONARY)) {
        columnDataChunk = new VariableLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, dimensionRawColumnChunk.getRowCount()[pageNumber]);
    } else {
        // to store fixed length column chunk values
        columnDataChunk = new FixedLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, dimensionRawColumnChunk.getRowCount()[pageNumber], eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()]);
    }
    return columnDataChunk;
}
Also used : FixedLengthDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk) DataChunk2(org.apache.carbondata.format.DataChunk2) DimensionColumnDataChunk(org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk) DataChunk3(org.apache.carbondata.format.DataChunk3) ColumnGroupDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionDataChunk) ByteBuffer(java.nio.ByteBuffer) VariableLengthDimensionDataChunk(org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk)

Example 7 with DataChunk3

use of org.apache.carbondata.format.DataChunk3 in project carbondata by apache.

the class CompressedDimensionChunkFileBasedReaderV3 method readRawDimensionChunk.

/**
   * Below method will be used to read the dimension column data form carbon data file
   * Steps for reading
   * 1. Get the length of the data to be read
   * 2. Allocate the direct buffer
   * 3. read the data from file
   * 4. Get the data chunk object from data read
   * 5. Create the raw chunk object and fill the details
   *
   * @param fileReader          reader for reading the column from carbon data file
   * @param blockletColumnIndex blocklet index of the column in carbon data file
   * @return dimension raw chunk
   */
public DimensionRawColumnChunk readRawDimensionChunk(FileHolder fileReader, int blockletColumnIndex) throws IOException {
    // get the current dimension offset
    long currentDimensionOffset = dimensionChunksOffset.get(blockletColumnIndex);
    int length = 0;
    // of the last dimension, we can subtract current dimension offset from lastDimesionOffset
    if (dimensionChunksOffset.size() - 1 == blockletColumnIndex) {
        length = (int) (lastDimensionOffsets - currentDimensionOffset);
    } else {
        length = (int) (dimensionChunksOffset.get(blockletColumnIndex + 1) - currentDimensionOffset);
    }
    ByteBuffer buffer = null;
    // read the data from carbon data file
    synchronized (fileReader) {
        buffer = fileReader.readByteBuffer(filePath, currentDimensionOffset, length);
    }
    // get the data chunk which will have all the details about the data pages
    DataChunk3 dataChunk = CarbonUtil.readDataChunk3(buffer, 0, length);
    // creating a raw chunks instance and filling all the details
    DimensionRawColumnChunk rawColumnChunk = new DimensionRawColumnChunk(blockletColumnIndex, buffer, 0, length, this);
    int numberOfPages = dataChunk.getPage_length().size();
    byte[][] maxValueOfEachPage = new byte[numberOfPages][];
    byte[][] minValueOfEachPage = new byte[numberOfPages][];
    int[] eachPageLength = new int[numberOfPages];
    for (int i = 0; i < minValueOfEachPage.length; i++) {
        maxValueOfEachPage[i] = dataChunk.getData_chunk_list().get(i).getMin_max().getMax_values().get(0).array();
        minValueOfEachPage[i] = dataChunk.getData_chunk_list().get(i).getMin_max().getMin_values().get(0).array();
        eachPageLength[i] = dataChunk.getData_chunk_list().get(i).getNumberOfRowsInpage();
    }
    rawColumnChunk.setDataChunkV3(dataChunk);
    rawColumnChunk.setFileHolder(fileReader);
    rawColumnChunk.setPagesCount(dataChunk.getPage_length().size());
    rawColumnChunk.setMaxValues(maxValueOfEachPage);
    rawColumnChunk.setMinValues(minValueOfEachPage);
    rawColumnChunk.setRowCount(eachPageLength);
    rawColumnChunk.setLengths(ArrayUtils.toPrimitive(dataChunk.page_length.toArray(new Integer[dataChunk.page_length.size()])));
    rawColumnChunk.setOffsets(ArrayUtils.toPrimitive(dataChunk.page_offset.toArray(new Integer[dataChunk.page_offset.size()])));
    return rawColumnChunk;
}
Also used : DataChunk3(org.apache.carbondata.format.DataChunk3) DimensionRawColumnChunk(org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk) ByteBuffer(java.nio.ByteBuffer)

Aggregations

DataChunk3 (org.apache.carbondata.format.DataChunk3)7 ByteBuffer (java.nio.ByteBuffer)6 DataChunk2 (org.apache.carbondata.format.DataChunk2)3 ArrayList (java.util.ArrayList)2 DimensionRawColumnChunk (org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk)2 MeasureRawColumnChunk (org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk)2 DimensionColumnDataChunk (org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk)1 MeasureColumnDataChunk (org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk)1 ColumnGroupDimensionDataChunk (org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionDataChunk)1 FixedLengthDimensionDataChunk (org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk)1 VariableLengthDimensionDataChunk (org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk)1 ValueCompressionHolder (org.apache.carbondata.core.datastore.compression.ValueCompressionHolder)1 WriterCompressModel (org.apache.carbondata.core.datastore.compression.WriterCompressModel)1 CarbonReadDataHolder (org.apache.carbondata.core.datastore.dataholder.CarbonReadDataHolder)1 ValueEncoderMeta (org.apache.carbondata.core.metadata.ValueEncoderMeta)1