Search in sources :

Example 1 with DataChunk3

use of org.apache.carbondata.format.DataChunk3 in project carbondata by apache.

the class CarbonMetadataUtil method getDataChunk3.

public static DataChunk3 getDataChunk3(List<NodeHolder> nodeHolderList, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties, int index, boolean isDimensionColumn) throws IOException {
    List<DataChunk2> dataChunksList = getDatachunk2(nodeHolderList, columnSchema, segmentProperties, index, isDimensionColumn);
    int offset = 0;
    DataChunk3 dataChunk = new DataChunk3();
    List<Integer> pageOffsets = new ArrayList<>();
    List<Integer> pageLengths = new ArrayList<>();
    int length = 0;
    for (int i = 0; i < dataChunksList.size(); i++) {
        pageOffsets.add(offset);
        length = dataChunksList.get(i).getData_page_length() + dataChunksList.get(i).getRle_page_length() + dataChunksList.get(i).getRowid_page_length();
        pageLengths.add(length);
        offset += length;
    }
    dataChunk.setData_chunk_list(dataChunksList);
    dataChunk.setPage_length(pageLengths);
    dataChunk.setPage_offset(pageOffsets);
    return dataChunk;
}
Also used : DataChunk2(org.apache.carbondata.format.DataChunk2) ArrayList(java.util.ArrayList) DataChunk3(org.apache.carbondata.format.DataChunk3)

Example 2 with DataChunk3

use of org.apache.carbondata.format.DataChunk3 in project carbondata by apache.

the class CompressedMeasureChunkFileBasedReaderV3 method convertToMeasureChunk.

/**
   * Below method will be used to convert the compressed measure chunk raw data to actual data
   *
   * @param measureRawColumnChunk measure raw chunk
   * @param pageNumber            number
   * @return DimensionColumnDataChunk
   */
@Override
public MeasureColumnDataChunk convertToMeasureChunk(MeasureRawColumnChunk measureRawColumnChunk, int pageNumber) throws IOException {
    MeasureColumnDataChunk datChunk = new MeasureColumnDataChunk();
    // data chunk of blocklet column
    DataChunk3 dataChunk3 = measureRawColumnChunk.getDataChunkV3();
    // data chunk of page
    DataChunk2 measureColumnChunk = dataChunk3.getData_chunk_list().get(pageNumber);
    // calculating the start point of data
    // as buffer can contain multiple column data, start point will be datachunkoffset +
    // data chunk length + page offset
    int copyPoint = measureRawColumnChunk.getOffSet() + measureColumnChunkLength.get(measureRawColumnChunk.getBlockletId()) + dataChunk3.getPage_offset().get(pageNumber);
    List<ValueEncoderMeta> valueEncodeMeta = new ArrayList<>();
    for (int i = 0; i < measureColumnChunk.getEncoder_meta().size(); i++) {
        valueEncodeMeta.add(CarbonUtil.deserializeEncoderMetaNew(measureColumnChunk.getEncoder_meta().get(i).array()));
    }
    WriterCompressModel compressionModel = CarbonUtil.getValueCompressionModel(valueEncodeMeta);
    ValueCompressionHolder values = compressionModel.getValueCompressionHolder()[0];
    // uncompress
    ByteBuffer rawData = measureRawColumnChunk.getRawData();
    values.uncompress(compressionModel.getConvertedDataType()[0], rawData.array(), copyPoint, measureColumnChunk.data_page_length, compressionModel.getMantissa()[0], compressionModel.getMaxValue()[0], measureRawColumnChunk.getRowCount()[pageNumber]);
    CarbonReadDataHolder measureDataHolder = new CarbonReadDataHolder(values);
    // set the data chunk
    datChunk.setMeasureDataHolder(measureDataHolder);
    // set the null value indexes
    datChunk.setNullValueIndexHolder(getPresenceMeta(measureColumnChunk.presence));
    return datChunk;
}
Also used : WriterCompressModel(org.apache.carbondata.core.datastore.compression.WriterCompressModel) ValueCompressionHolder(org.apache.carbondata.core.datastore.compression.ValueCompressionHolder) DataChunk2(org.apache.carbondata.format.DataChunk2) ArrayList(java.util.ArrayList) DataChunk3(org.apache.carbondata.format.DataChunk3) CarbonReadDataHolder(org.apache.carbondata.core.datastore.dataholder.CarbonReadDataHolder) ValueEncoderMeta(org.apache.carbondata.core.metadata.ValueEncoderMeta) MeasureColumnDataChunk(org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk) ByteBuffer(java.nio.ByteBuffer)

Example 3 with DataChunk3

use of org.apache.carbondata.format.DataChunk3 in project carbondata by apache.

the class CompressedMeasureChunkFileBasedReaderV3 method readRawMeasureChunk.

/**
   * Below method will be used to read the measure column data form carbon data file
   * 1. Get the length of the data to be read
   * 2. Allocate the direct buffer
   * 3. read the data from file
   * 4. Get the data chunk object from data read
   * 5. Create the raw chunk object and fill the details
   *
   * @param fileReader          reader for reading the column from carbon data file
   * @param blockIndex          blocklet index of the column in carbon data file
   * @return measure raw chunk
   */
@Override
public MeasureRawColumnChunk readRawMeasureChunk(FileHolder fileReader, int blockletColumnIndex) throws IOException {
    int dataLength = 0;
    // of the last dimension, we can subtract current dimension offset from lastDimesionOffset
    if (measureColumnChunkOffsets.size() - 1 == blockletColumnIndex) {
        dataLength = (int) (measureOffsets - measureColumnChunkOffsets.get(blockletColumnIndex));
    } else {
        dataLength = (int) (measureColumnChunkOffsets.get(blockletColumnIndex + 1) - measureColumnChunkOffsets.get(blockletColumnIndex));
    }
    ByteBuffer buffer = null;
    // read the data from carbon data file
    synchronized (fileReader) {
        buffer = fileReader.readByteBuffer(filePath, measureColumnChunkOffsets.get(blockletColumnIndex), dataLength);
    }
    // get the data chunk which will have all the details about the data pages
    DataChunk3 dataChunk = CarbonUtil.readDataChunk3(buffer, 0, measureColumnChunkLength.get(blockletColumnIndex));
    // creating a raw chunks instance and filling all the details
    MeasureRawColumnChunk rawColumnChunk = new MeasureRawColumnChunk(blockletColumnIndex, buffer, 0, dataLength, this);
    int numberOfPages = dataChunk.getPage_length().size();
    byte[][] maxValueOfEachPage = new byte[numberOfPages][];
    byte[][] minValueOfEachPage = new byte[numberOfPages][];
    int[] eachPageLength = new int[numberOfPages];
    for (int i = 0; i < minValueOfEachPage.length; i++) {
        maxValueOfEachPage[i] = dataChunk.getData_chunk_list().get(i).getMin_max().getMax_values().get(0).array();
        minValueOfEachPage[i] = dataChunk.getData_chunk_list().get(i).getMin_max().getMin_values().get(0).array();
        eachPageLength[i] = dataChunk.getData_chunk_list().get(i).getNumberOfRowsInpage();
    }
    rawColumnChunk.setDataChunkV3(dataChunk);
    rawColumnChunk.setFileReader(fileReader);
    rawColumnChunk.setPagesCount(dataChunk.getPage_length().size());
    rawColumnChunk.setMaxValues(maxValueOfEachPage);
    rawColumnChunk.setMinValues(minValueOfEachPage);
    rawColumnChunk.setRowCount(eachPageLength);
    rawColumnChunk.setLengths(ArrayUtils.toPrimitive(dataChunk.page_length.toArray(new Integer[dataChunk.page_length.size()])));
    rawColumnChunk.setOffsets(ArrayUtils.toPrimitive(dataChunk.page_offset.toArray(new Integer[dataChunk.page_offset.size()])));
    return rawColumnChunk;
}
Also used : DataChunk3(org.apache.carbondata.format.DataChunk3) MeasureRawColumnChunk(org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk) ByteBuffer(java.nio.ByteBuffer)

Example 4 with DataChunk3

use of org.apache.carbondata.format.DataChunk3 in project carbondata by apache.

the class CompressedMeasureChunkFileBasedReaderV3 method readRawMeasureChunksInGroup.

/**
   * Below method will be used to read the multiple measure column data in group
   * and divide into measure raw chunk object
   * Steps for reading
   * 1. Get the length of the data to be read
   * 2. Allocate the direct buffer
   * 3. read the data from file
   * 4. Get the data chunk object from file for each column
   * 5. Create the raw chunk object and fill the details for each column
   * 6. increment the offset of the data
   *
   * @param fileReader
   *        reader which will be used to read the measure columns data from file
   * @param startColumnBlockletIndex
   *        blocklet index of the first measure column
   * @param endColumnBlockletIndex
   *        blocklet index of the last measure column
   * @return MeasureRawColumnChunk array
   */
protected MeasureRawColumnChunk[] readRawMeasureChunksInGroup(FileHolder fileReader, int startColumnBlockletIndex, int endColumnBlockletIndex) throws IOException {
    // to calculate the length of the data to be read
    // column we can subtract the offset of start column offset with
    // end column+1 offset and get the total length.
    long currentMeasureOffset = measureColumnChunkOffsets.get(startColumnBlockletIndex);
    ByteBuffer buffer = null;
    // read the data from carbon data file
    synchronized (fileReader) {
        buffer = fileReader.readByteBuffer(filePath, currentMeasureOffset, (int) (measureColumnChunkOffsets.get(endColumnBlockletIndex + 1) - currentMeasureOffset));
    }
    // create raw chunk for each measure column
    MeasureRawColumnChunk[] measureDataChunk = new MeasureRawColumnChunk[endColumnBlockletIndex - startColumnBlockletIndex + 1];
    int runningLength = 0;
    int index = 0;
    for (int i = startColumnBlockletIndex; i <= endColumnBlockletIndex; i++) {
        int currentLength = (int) (measureColumnChunkOffsets.get(i + 1) - measureColumnChunkOffsets.get(i));
        MeasureRawColumnChunk measureRawColumnChunk = new MeasureRawColumnChunk(i, buffer, runningLength, currentLength, this);
        DataChunk3 dataChunk = CarbonUtil.readDataChunk3(buffer, runningLength, measureColumnChunkLength.get(i));
        int numberOfPages = dataChunk.getPage_length().size();
        byte[][] maxValueOfEachPage = new byte[numberOfPages][];
        byte[][] minValueOfEachPage = new byte[numberOfPages][];
        int[] eachPageLength = new int[numberOfPages];
        for (int j = 0; j < minValueOfEachPage.length; j++) {
            maxValueOfEachPage[j] = dataChunk.getData_chunk_list().get(j).getMin_max().getMax_values().get(0).array();
            minValueOfEachPage[j] = dataChunk.getData_chunk_list().get(j).getMin_max().getMin_values().get(0).array();
            eachPageLength[j] = dataChunk.getData_chunk_list().get(j).getNumberOfRowsInpage();
        }
        measureRawColumnChunk.setDataChunkV3(dataChunk);
        ;
        measureRawColumnChunk.setFileReader(fileReader);
        measureRawColumnChunk.setPagesCount(dataChunk.getPage_length().size());
        measureRawColumnChunk.setMaxValues(maxValueOfEachPage);
        measureRawColumnChunk.setMinValues(minValueOfEachPage);
        measureRawColumnChunk.setRowCount(eachPageLength);
        measureRawColumnChunk.setLengths(ArrayUtils.toPrimitive(dataChunk.page_length.toArray(new Integer[dataChunk.page_length.size()])));
        measureRawColumnChunk.setOffsets(ArrayUtils.toPrimitive(dataChunk.page_offset.toArray(new Integer[dataChunk.page_offset.size()])));
        measureDataChunk[index] = measureRawColumnChunk;
        runningLength += currentLength;
        index++;
    }
    return measureDataChunk;
}
Also used : DataChunk3(org.apache.carbondata.format.DataChunk3) MeasureRawColumnChunk(org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk) ByteBuffer(java.nio.ByteBuffer)

Example 5 with DataChunk3

use of org.apache.carbondata.format.DataChunk3 in project carbondata by apache.

the class CompressedDimensionChunkFileBasedReaderV3 method readRawDimensionChunksInGroup.

/**
   * Below method will be used to read the multiple dimension column data in group
   * and divide into dimension raw chunk object
   * Steps for reading
   * 1. Get the length of the data to be read
   * 2. Allocate the direct buffer
   * 3. read the data from file
   * 4. Get the data chunk object from file for each column
   * 5. Create the raw chunk object and fill the details for each column
   * 6. increment the offset of the data
   *
   * @param fileReader
   *        reader which will be used to read the dimension columns data from file
   * @param startBlockletColumnIndex
   *        blocklet index of the first dimension column
   * @param endBlockletColumnIndex
   *        blocklet index of the last dimension column
   * @ DimensionRawColumnChunk array
   */
protected DimensionRawColumnChunk[] readRawDimensionChunksInGroup(FileHolder fileReader, int startBlockletColumnIndex, int endBlockletColumnIndex) throws IOException {
    // to calculate the length of the data to be read
    // column we can subtract the offset of start column offset with
    // end column+1 offset and get the total length.
    long currentDimensionOffset = dimensionChunksOffset.get(startBlockletColumnIndex);
    ByteBuffer buffer = null;
    // read the data from carbon data file
    synchronized (fileReader) {
        buffer = fileReader.readByteBuffer(filePath, currentDimensionOffset, (int) (dimensionChunksOffset.get(endBlockletColumnIndex + 1) - currentDimensionOffset));
    }
    // create raw chunk for each dimension column
    DimensionRawColumnChunk[] dimensionDataChunks = new DimensionRawColumnChunk[endBlockletColumnIndex - startBlockletColumnIndex + 1];
    int index = 0;
    int runningLength = 0;
    for (int i = startBlockletColumnIndex; i <= endBlockletColumnIndex; i++) {
        int currentLength = (int) (dimensionChunksOffset.get(i + 1) - dimensionChunksOffset.get(i));
        dimensionDataChunks[index] = new DimensionRawColumnChunk(i, buffer, runningLength, currentLength, this);
        DataChunk3 dataChunk = CarbonUtil.readDataChunk3(buffer, runningLength, dimensionChunksLength.get(i));
        int numberOfPages = dataChunk.getPage_length().size();
        byte[][] maxValueOfEachPage = new byte[numberOfPages][];
        byte[][] minValueOfEachPage = new byte[numberOfPages][];
        int[] eachPageLength = new int[numberOfPages];
        for (int j = 0; j < minValueOfEachPage.length; j++) {
            maxValueOfEachPage[j] = dataChunk.getData_chunk_list().get(j).getMin_max().getMax_values().get(0).array();
            minValueOfEachPage[j] = dataChunk.getData_chunk_list().get(j).getMin_max().getMin_values().get(0).array();
            eachPageLength[j] = dataChunk.getData_chunk_list().get(j).getNumberOfRowsInpage();
        }
        dimensionDataChunks[index].setDataChunkV3(dataChunk);
        dimensionDataChunks[index].setFileHolder(fileReader);
        dimensionDataChunks[index].setPagesCount(dataChunk.getPage_length().size());
        dimensionDataChunks[index].setMaxValues(maxValueOfEachPage);
        dimensionDataChunks[index].setMinValues(minValueOfEachPage);
        dimensionDataChunks[index].setRowCount(eachPageLength);
        dimensionDataChunks[index].setLengths(ArrayUtils.toPrimitive(dataChunk.page_length.toArray(new Integer[dataChunk.page_length.size()])));
        dimensionDataChunks[index].setOffsets(ArrayUtils.toPrimitive(dataChunk.page_offset.toArray(new Integer[dataChunk.page_offset.size()])));
        runningLength += currentLength;
        index++;
    }
    return dimensionDataChunks;
}
Also used : DataChunk3(org.apache.carbondata.format.DataChunk3) DimensionRawColumnChunk(org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk) ByteBuffer(java.nio.ByteBuffer)

Aggregations

DataChunk3 (org.apache.carbondata.format.DataChunk3)7 ByteBuffer (java.nio.ByteBuffer)6 DataChunk2 (org.apache.carbondata.format.DataChunk2)3 ArrayList (java.util.ArrayList)2 DimensionRawColumnChunk (org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk)2 MeasureRawColumnChunk (org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk)2 DimensionColumnDataChunk (org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk)1 MeasureColumnDataChunk (org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk)1 ColumnGroupDimensionDataChunk (org.apache.carbondata.core.datastore.chunk.impl.ColumnGroupDimensionDataChunk)1 FixedLengthDimensionDataChunk (org.apache.carbondata.core.datastore.chunk.impl.FixedLengthDimensionDataChunk)1 VariableLengthDimensionDataChunk (org.apache.carbondata.core.datastore.chunk.impl.VariableLengthDimensionDataChunk)1 ValueCompressionHolder (org.apache.carbondata.core.datastore.compression.ValueCompressionHolder)1 WriterCompressModel (org.apache.carbondata.core.datastore.compression.WriterCompressModel)1 CarbonReadDataHolder (org.apache.carbondata.core.datastore.dataholder.CarbonReadDataHolder)1 ValueEncoderMeta (org.apache.carbondata.core.metadata.ValueEncoderMeta)1