use of org.apache.carbondata.format.DataChunk3 in project carbondata by apache.
the class CarbonMetadataUtil method getDataChunk3.
public static DataChunk3 getDataChunk3(List<NodeHolder> nodeHolderList, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties, int index, boolean isDimensionColumn) throws IOException {
List<DataChunk2> dataChunksList = getDatachunk2(nodeHolderList, columnSchema, segmentProperties, index, isDimensionColumn);
int offset = 0;
DataChunk3 dataChunk = new DataChunk3();
List<Integer> pageOffsets = new ArrayList<>();
List<Integer> pageLengths = new ArrayList<>();
int length = 0;
for (int i = 0; i < dataChunksList.size(); i++) {
pageOffsets.add(offset);
length = dataChunksList.get(i).getData_page_length() + dataChunksList.get(i).getRle_page_length() + dataChunksList.get(i).getRowid_page_length();
pageLengths.add(length);
offset += length;
}
dataChunk.setData_chunk_list(dataChunksList);
dataChunk.setPage_length(pageLengths);
dataChunk.setPage_offset(pageOffsets);
return dataChunk;
}
use of org.apache.carbondata.format.DataChunk3 in project carbondata by apache.
the class CompressedMeasureChunkFileBasedReaderV3 method convertToMeasureChunk.
/**
* Below method will be used to convert the compressed measure chunk raw data to actual data
*
* @param measureRawColumnChunk measure raw chunk
* @param pageNumber number
* @return DimensionColumnDataChunk
*/
@Override
public MeasureColumnDataChunk convertToMeasureChunk(MeasureRawColumnChunk measureRawColumnChunk, int pageNumber) throws IOException {
MeasureColumnDataChunk datChunk = new MeasureColumnDataChunk();
// data chunk of blocklet column
DataChunk3 dataChunk3 = measureRawColumnChunk.getDataChunkV3();
// data chunk of page
DataChunk2 measureColumnChunk = dataChunk3.getData_chunk_list().get(pageNumber);
// calculating the start point of data
// as buffer can contain multiple column data, start point will be datachunkoffset +
// data chunk length + page offset
int copyPoint = measureRawColumnChunk.getOffSet() + measureColumnChunkLength.get(measureRawColumnChunk.getBlockletId()) + dataChunk3.getPage_offset().get(pageNumber);
List<ValueEncoderMeta> valueEncodeMeta = new ArrayList<>();
for (int i = 0; i < measureColumnChunk.getEncoder_meta().size(); i++) {
valueEncodeMeta.add(CarbonUtil.deserializeEncoderMetaNew(measureColumnChunk.getEncoder_meta().get(i).array()));
}
WriterCompressModel compressionModel = CarbonUtil.getValueCompressionModel(valueEncodeMeta);
ValueCompressionHolder values = compressionModel.getValueCompressionHolder()[0];
// uncompress
ByteBuffer rawData = measureRawColumnChunk.getRawData();
values.uncompress(compressionModel.getConvertedDataType()[0], rawData.array(), copyPoint, measureColumnChunk.data_page_length, compressionModel.getMantissa()[0], compressionModel.getMaxValue()[0], measureRawColumnChunk.getRowCount()[pageNumber]);
CarbonReadDataHolder measureDataHolder = new CarbonReadDataHolder(values);
// set the data chunk
datChunk.setMeasureDataHolder(measureDataHolder);
// set the null value indexes
datChunk.setNullValueIndexHolder(getPresenceMeta(measureColumnChunk.presence));
return datChunk;
}
use of org.apache.carbondata.format.DataChunk3 in project carbondata by apache.
the class CompressedMeasureChunkFileBasedReaderV3 method readRawMeasureChunk.
/**
* Below method will be used to read the measure column data form carbon data file
* 1. Get the length of the data to be read
* 2. Allocate the direct buffer
* 3. read the data from file
* 4. Get the data chunk object from data read
* 5. Create the raw chunk object and fill the details
*
* @param fileReader reader for reading the column from carbon data file
* @param blockIndex blocklet index of the column in carbon data file
* @return measure raw chunk
*/
@Override
public MeasureRawColumnChunk readRawMeasureChunk(FileHolder fileReader, int blockletColumnIndex) throws IOException {
int dataLength = 0;
// of the last dimension, we can subtract current dimension offset from lastDimesionOffset
if (measureColumnChunkOffsets.size() - 1 == blockletColumnIndex) {
dataLength = (int) (measureOffsets - measureColumnChunkOffsets.get(blockletColumnIndex));
} else {
dataLength = (int) (measureColumnChunkOffsets.get(blockletColumnIndex + 1) - measureColumnChunkOffsets.get(blockletColumnIndex));
}
ByteBuffer buffer = null;
// read the data from carbon data file
synchronized (fileReader) {
buffer = fileReader.readByteBuffer(filePath, measureColumnChunkOffsets.get(blockletColumnIndex), dataLength);
}
// get the data chunk which will have all the details about the data pages
DataChunk3 dataChunk = CarbonUtil.readDataChunk3(buffer, 0, measureColumnChunkLength.get(blockletColumnIndex));
// creating a raw chunks instance and filling all the details
MeasureRawColumnChunk rawColumnChunk = new MeasureRawColumnChunk(blockletColumnIndex, buffer, 0, dataLength, this);
int numberOfPages = dataChunk.getPage_length().size();
byte[][] maxValueOfEachPage = new byte[numberOfPages][];
byte[][] minValueOfEachPage = new byte[numberOfPages][];
int[] eachPageLength = new int[numberOfPages];
for (int i = 0; i < minValueOfEachPage.length; i++) {
maxValueOfEachPage[i] = dataChunk.getData_chunk_list().get(i).getMin_max().getMax_values().get(0).array();
minValueOfEachPage[i] = dataChunk.getData_chunk_list().get(i).getMin_max().getMin_values().get(0).array();
eachPageLength[i] = dataChunk.getData_chunk_list().get(i).getNumberOfRowsInpage();
}
rawColumnChunk.setDataChunkV3(dataChunk);
rawColumnChunk.setFileReader(fileReader);
rawColumnChunk.setPagesCount(dataChunk.getPage_length().size());
rawColumnChunk.setMaxValues(maxValueOfEachPage);
rawColumnChunk.setMinValues(minValueOfEachPage);
rawColumnChunk.setRowCount(eachPageLength);
rawColumnChunk.setLengths(ArrayUtils.toPrimitive(dataChunk.page_length.toArray(new Integer[dataChunk.page_length.size()])));
rawColumnChunk.setOffsets(ArrayUtils.toPrimitive(dataChunk.page_offset.toArray(new Integer[dataChunk.page_offset.size()])));
return rawColumnChunk;
}
use of org.apache.carbondata.format.DataChunk3 in project carbondata by apache.
the class CompressedMeasureChunkFileBasedReaderV3 method readRawMeasureChunksInGroup.
/**
* Below method will be used to read the multiple measure column data in group
* and divide into measure raw chunk object
* Steps for reading
* 1. Get the length of the data to be read
* 2. Allocate the direct buffer
* 3. read the data from file
* 4. Get the data chunk object from file for each column
* 5. Create the raw chunk object and fill the details for each column
* 6. increment the offset of the data
*
* @param fileReader
* reader which will be used to read the measure columns data from file
* @param startColumnBlockletIndex
* blocklet index of the first measure column
* @param endColumnBlockletIndex
* blocklet index of the last measure column
* @return MeasureRawColumnChunk array
*/
protected MeasureRawColumnChunk[] readRawMeasureChunksInGroup(FileHolder fileReader, int startColumnBlockletIndex, int endColumnBlockletIndex) throws IOException {
// to calculate the length of the data to be read
// column we can subtract the offset of start column offset with
// end column+1 offset and get the total length.
long currentMeasureOffset = measureColumnChunkOffsets.get(startColumnBlockletIndex);
ByteBuffer buffer = null;
// read the data from carbon data file
synchronized (fileReader) {
buffer = fileReader.readByteBuffer(filePath, currentMeasureOffset, (int) (measureColumnChunkOffsets.get(endColumnBlockletIndex + 1) - currentMeasureOffset));
}
// create raw chunk for each measure column
MeasureRawColumnChunk[] measureDataChunk = new MeasureRawColumnChunk[endColumnBlockletIndex - startColumnBlockletIndex + 1];
int runningLength = 0;
int index = 0;
for (int i = startColumnBlockletIndex; i <= endColumnBlockletIndex; i++) {
int currentLength = (int) (measureColumnChunkOffsets.get(i + 1) - measureColumnChunkOffsets.get(i));
MeasureRawColumnChunk measureRawColumnChunk = new MeasureRawColumnChunk(i, buffer, runningLength, currentLength, this);
DataChunk3 dataChunk = CarbonUtil.readDataChunk3(buffer, runningLength, measureColumnChunkLength.get(i));
int numberOfPages = dataChunk.getPage_length().size();
byte[][] maxValueOfEachPage = new byte[numberOfPages][];
byte[][] minValueOfEachPage = new byte[numberOfPages][];
int[] eachPageLength = new int[numberOfPages];
for (int j = 0; j < minValueOfEachPage.length; j++) {
maxValueOfEachPage[j] = dataChunk.getData_chunk_list().get(j).getMin_max().getMax_values().get(0).array();
minValueOfEachPage[j] = dataChunk.getData_chunk_list().get(j).getMin_max().getMin_values().get(0).array();
eachPageLength[j] = dataChunk.getData_chunk_list().get(j).getNumberOfRowsInpage();
}
measureRawColumnChunk.setDataChunkV3(dataChunk);
;
measureRawColumnChunk.setFileReader(fileReader);
measureRawColumnChunk.setPagesCount(dataChunk.getPage_length().size());
measureRawColumnChunk.setMaxValues(maxValueOfEachPage);
measureRawColumnChunk.setMinValues(minValueOfEachPage);
measureRawColumnChunk.setRowCount(eachPageLength);
measureRawColumnChunk.setLengths(ArrayUtils.toPrimitive(dataChunk.page_length.toArray(new Integer[dataChunk.page_length.size()])));
measureRawColumnChunk.setOffsets(ArrayUtils.toPrimitive(dataChunk.page_offset.toArray(new Integer[dataChunk.page_offset.size()])));
measureDataChunk[index] = measureRawColumnChunk;
runningLength += currentLength;
index++;
}
return measureDataChunk;
}
use of org.apache.carbondata.format.DataChunk3 in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV3 method readRawDimensionChunksInGroup.
/**
* Below method will be used to read the multiple dimension column data in group
* and divide into dimension raw chunk object
* Steps for reading
* 1. Get the length of the data to be read
* 2. Allocate the direct buffer
* 3. read the data from file
* 4. Get the data chunk object from file for each column
* 5. Create the raw chunk object and fill the details for each column
* 6. increment the offset of the data
*
* @param fileReader
* reader which will be used to read the dimension columns data from file
* @param startBlockletColumnIndex
* blocklet index of the first dimension column
* @param endBlockletColumnIndex
* blocklet index of the last dimension column
* @ DimensionRawColumnChunk array
*/
protected DimensionRawColumnChunk[] readRawDimensionChunksInGroup(FileHolder fileReader, int startBlockletColumnIndex, int endBlockletColumnIndex) throws IOException {
// to calculate the length of the data to be read
// column we can subtract the offset of start column offset with
// end column+1 offset and get the total length.
long currentDimensionOffset = dimensionChunksOffset.get(startBlockletColumnIndex);
ByteBuffer buffer = null;
// read the data from carbon data file
synchronized (fileReader) {
buffer = fileReader.readByteBuffer(filePath, currentDimensionOffset, (int) (dimensionChunksOffset.get(endBlockletColumnIndex + 1) - currentDimensionOffset));
}
// create raw chunk for each dimension column
DimensionRawColumnChunk[] dimensionDataChunks = new DimensionRawColumnChunk[endBlockletColumnIndex - startBlockletColumnIndex + 1];
int index = 0;
int runningLength = 0;
for (int i = startBlockletColumnIndex; i <= endBlockletColumnIndex; i++) {
int currentLength = (int) (dimensionChunksOffset.get(i + 1) - dimensionChunksOffset.get(i));
dimensionDataChunks[index] = new DimensionRawColumnChunk(i, buffer, runningLength, currentLength, this);
DataChunk3 dataChunk = CarbonUtil.readDataChunk3(buffer, runningLength, dimensionChunksLength.get(i));
int numberOfPages = dataChunk.getPage_length().size();
byte[][] maxValueOfEachPage = new byte[numberOfPages][];
byte[][] minValueOfEachPage = new byte[numberOfPages][];
int[] eachPageLength = new int[numberOfPages];
for (int j = 0; j < minValueOfEachPage.length; j++) {
maxValueOfEachPage[j] = dataChunk.getData_chunk_list().get(j).getMin_max().getMax_values().get(0).array();
minValueOfEachPage[j] = dataChunk.getData_chunk_list().get(j).getMin_max().getMin_values().get(0).array();
eachPageLength[j] = dataChunk.getData_chunk_list().get(j).getNumberOfRowsInpage();
}
dimensionDataChunks[index].setDataChunkV3(dataChunk);
dimensionDataChunks[index].setFileHolder(fileReader);
dimensionDataChunks[index].setPagesCount(dataChunk.getPage_length().size());
dimensionDataChunks[index].setMaxValues(maxValueOfEachPage);
dimensionDataChunks[index].setMinValues(minValueOfEachPage);
dimensionDataChunks[index].setRowCount(eachPageLength);
dimensionDataChunks[index].setLengths(ArrayUtils.toPrimitive(dataChunk.page_length.toArray(new Integer[dataChunk.page_length.size()])));
dimensionDataChunks[index].setOffsets(ArrayUtils.toPrimitive(dataChunk.page_offset.toArray(new Integer[dataChunk.page_offset.size()])));
runningLength += currentLength;
index++;
}
return dimensionDataChunks;
}
Aggregations