use of org.apache.carbondata.format.DataChunk2 in project carbondata by apache.
the class CarbonMetadataUtil method getDatachunk2.
/**
* Below method will be used to get the data chunk object for all the columns
*
* @param blockletInfoColumnar blocklet info
* @param columnSchema list of columns
* @param segmentProperties segment properties
* @return list of data chunks
* @throws IOException
*/
public static List<DataChunk2> getDatachunk2(BlockletInfoColumnar blockletInfoColumnar, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties) throws IOException {
List<DataChunk2> colDataChunks = new ArrayList<DataChunk2>();
int rowIdIndex = 0;
int aggregateIndex = 0;
boolean[] isSortedKeyColumn = blockletInfoColumnar.getIsSortedKeyColumn();
boolean[] aggKeyBlock = blockletInfoColumnar.getAggKeyBlock();
boolean[] colGrpblock = blockletInfoColumnar.getColGrpBlocks();
for (int i = 0; i < blockletInfoColumnar.getKeyLengths().length; i++) {
DataChunk2 dataChunk = new DataChunk2();
dataChunk.setChunk_meta(getChunkCompressionMeta());
List<Encoding> encodings = new ArrayList<Encoding>();
if (containsEncoding(i, Encoding.DICTIONARY, columnSchema, segmentProperties)) {
encodings.add(Encoding.DICTIONARY);
}
if (containsEncoding(i, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) {
encodings.add(Encoding.DIRECT_DICTIONARY);
}
dataChunk.setRowMajor(colGrpblock[i]);
// TODO : Once schema PR is merged and information needs to be passed
// here.
dataChunk.setData_page_length(blockletInfoColumnar.getKeyLengths()[i]);
if (aggKeyBlock[i]) {
dataChunk.setRle_page_length(blockletInfoColumnar.getDataIndexMapLength()[aggregateIndex]);
encodings.add(Encoding.RLE);
aggregateIndex++;
}
dataChunk.setSort_state(isSortedKeyColumn[i] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE);
if (!isSortedKeyColumn[i]) {
dataChunk.setRowid_page_length(blockletInfoColumnar.getKeyBlockIndexLength()[rowIdIndex]);
encodings.add(Encoding.INVERTED_INDEX);
rowIdIndex++;
}
// TODO : Right now the encodings are happening at runtime. change as per
// this encoders.
dataChunk.setEncoders(encodings);
colDataChunks.add(dataChunk);
}
for (int i = 0; i < blockletInfoColumnar.getMeasureLength().length; i++) {
DataChunk2 dataChunk = new DataChunk2();
dataChunk.setChunk_meta(getChunkCompressionMeta());
dataChunk.setRowMajor(false);
// TODO : Once schema PR is merged and information needs to be passed
// here.
dataChunk.setData_page_length(blockletInfoColumnar.getMeasureLength()[i]);
// TODO : Right now the encodings are happening at runtime. change as per
// this encoders.
List<Encoding> encodings = new ArrayList<Encoding>();
encodings.add(Encoding.DELTA);
dataChunk.setEncoders(encodings);
// TODO writing dummy presence meta need to set actual presence
// meta
PresenceMeta presenceMeta = new PresenceMeta();
presenceMeta.setPresent_bit_streamIsSet(true);
presenceMeta.setPresent_bit_stream(CompressorFactory.getInstance().getCompressor().compressByte(blockletInfoColumnar.getMeasureNullValueIndex()[i].toByteArray()));
dataChunk.setPresence(presenceMeta);
// TODO : PresenceMeta needs to be implemented and set here
// dataChunk.setPresence(new PresenceMeta());
// TODO : Need to write ValueCompression meta here.
List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>();
encoderMetaList.add(ByteBuffer.wrap(serializeEncoderMeta(createValueEncoderMeta(blockletInfoColumnar.getCompressionModel(), i))));
dataChunk.setEncoder_meta(encoderMetaList);
colDataChunks.add(dataChunk);
}
return colDataChunks;
}
use of org.apache.carbondata.format.DataChunk2 in project carbondata by apache.
the class CarbonMetadataUtil method getDataChunk3.
public static DataChunk3 getDataChunk3(List<NodeHolder> nodeHolderList, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties, int index, boolean isDimensionColumn) throws IOException {
List<DataChunk2> dataChunksList = getDatachunk2(nodeHolderList, columnSchema, segmentProperties, index, isDimensionColumn);
int offset = 0;
DataChunk3 dataChunk = new DataChunk3();
List<Integer> pageOffsets = new ArrayList<>();
List<Integer> pageLengths = new ArrayList<>();
int length = 0;
for (int i = 0; i < dataChunksList.size(); i++) {
pageOffsets.add(offset);
length = dataChunksList.get(i).getData_page_length() + dataChunksList.get(i).getRle_page_length() + dataChunksList.get(i).getRowid_page_length();
pageLengths.add(length);
offset += length;
}
dataChunk.setData_chunk_list(dataChunksList);
dataChunk.setPage_length(pageLengths);
dataChunk.setPage_offset(pageOffsets);
return dataChunk;
}
use of org.apache.carbondata.format.DataChunk2 in project carbondata by apache.
the class CompressedMeasureChunkFileBasedReaderV2 method convertToMeasureChunk.
public MeasureColumnDataChunk convertToMeasureChunk(MeasureRawColumnChunk measureRawColumnChunk, int pageNumber) throws IOException {
MeasureColumnDataChunk datChunk = new MeasureColumnDataChunk();
DataChunk2 measureColumnChunk = null;
int copyPoint = measureRawColumnChunk.getOffSet();
int blockIndex = measureRawColumnChunk.getBlockletId();
ByteBuffer rawData = measureRawColumnChunk.getRawData();
if (measureColumnChunkOffsets.size() - 1 == blockIndex) {
measureColumnChunk = CarbonUtil.readDataChunk(rawData, copyPoint, measureColumnChunkLength.get(blockIndex));
synchronized (measureRawColumnChunk.getFileReader()) {
rawData = measureRawColumnChunk.getFileReader().readByteBuffer(filePath, measureColumnChunkOffsets.get(blockIndex) + measureColumnChunkLength.get(blockIndex), measureColumnChunk.data_page_length);
}
} else {
measureColumnChunk = CarbonUtil.readDataChunk(rawData, copyPoint, measureColumnChunkLength.get(blockIndex));
copyPoint += measureColumnChunkLength.get(blockIndex);
}
List<ValueEncoderMeta> valueEncodeMeta = new ArrayList<>();
for (int i = 0; i < measureColumnChunk.getEncoder_meta().size(); i++) {
valueEncodeMeta.add(CarbonUtil.deserializeEncoderMeta(measureColumnChunk.getEncoder_meta().get(i).array()));
}
WriterCompressModel compressionModel = CarbonUtil.getValueCompressionModel(valueEncodeMeta);
ValueCompressionHolder values = compressionModel.getValueCompressionHolder()[0];
// uncompress
values.uncompress(compressionModel.getConvertedDataType()[0], rawData.array(), copyPoint, measureColumnChunk.data_page_length, compressionModel.getMantissa()[0], compressionModel.getMaxValue()[0], numberOfRows);
CarbonReadDataHolder measureDataHolder = new CarbonReadDataHolder(values);
// set the data chunk
datChunk.setMeasureDataHolder(measureDataHolder);
// set the enun value indexes
datChunk.setNullValueIndexHolder(getPresenceMeta(measureColumnChunk.presence));
return datChunk;
}
use of org.apache.carbondata.format.DataChunk2 in project carbondata by apache.
the class CompressedMeasureChunkFileBasedReaderV3 method convertToMeasureChunk.
/**
* Below method will be used to convert the compressed measure chunk raw data to actual data
*
* @param measureRawColumnChunk measure raw chunk
* @param pageNumber number
* @return DimensionColumnDataChunk
*/
@Override
public MeasureColumnDataChunk convertToMeasureChunk(MeasureRawColumnChunk measureRawColumnChunk, int pageNumber) throws IOException {
MeasureColumnDataChunk datChunk = new MeasureColumnDataChunk();
// data chunk of blocklet column
DataChunk3 dataChunk3 = measureRawColumnChunk.getDataChunkV3();
// data chunk of page
DataChunk2 measureColumnChunk = dataChunk3.getData_chunk_list().get(pageNumber);
// calculating the start point of data
// as buffer can contain multiple column data, start point will be datachunkoffset +
// data chunk length + page offset
int copyPoint = measureRawColumnChunk.getOffSet() + measureColumnChunkLength.get(measureRawColumnChunk.getBlockletId()) + dataChunk3.getPage_offset().get(pageNumber);
List<ValueEncoderMeta> valueEncodeMeta = new ArrayList<>();
for (int i = 0; i < measureColumnChunk.getEncoder_meta().size(); i++) {
valueEncodeMeta.add(CarbonUtil.deserializeEncoderMetaNew(measureColumnChunk.getEncoder_meta().get(i).array()));
}
WriterCompressModel compressionModel = CarbonUtil.getValueCompressionModel(valueEncodeMeta);
ValueCompressionHolder values = compressionModel.getValueCompressionHolder()[0];
// uncompress
ByteBuffer rawData = measureRawColumnChunk.getRawData();
values.uncompress(compressionModel.getConvertedDataType()[0], rawData.array(), copyPoint, measureColumnChunk.data_page_length, compressionModel.getMantissa()[0], compressionModel.getMaxValue()[0], measureRawColumnChunk.getRowCount()[pageNumber]);
CarbonReadDataHolder measureDataHolder = new CarbonReadDataHolder(values);
// set the data chunk
datChunk.setMeasureDataHolder(measureDataHolder);
// set the null value indexes
datChunk.setNullValueIndexHolder(getPresenceMeta(measureColumnChunk.presence));
return datChunk;
}
use of org.apache.carbondata.format.DataChunk2 in project carbondata by apache.
the class CompressedDimensionChunkFileBasedReaderV3 method convertToDimensionChunk.
/**
* Below method will be used to convert the compressed dimension chunk raw data to actual data
*
* @param dimensionRawColumnChunk dimension raw chunk
* @param pageNumber number
* @return DimensionColumnDataChunk
*/
@Override
public DimensionColumnDataChunk convertToDimensionChunk(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException {
byte[] dataPage = null;
int[] invertedIndexes = null;
int[] invertedIndexesReverse = null;
int[] rlePage = null;
// data chunk of page
DataChunk2 dimensionColumnChunk = null;
// data chunk of blocklet column
DataChunk3 dataChunk3 = dimensionRawColumnChunk.getDataChunkV3();
// get the data buffer
ByteBuffer rawData = dimensionRawColumnChunk.getRawData();
dimensionColumnChunk = dataChunk3.getData_chunk_list().get(pageNumber);
// calculating the start point of data
// as buffer can contain multiple column data, start point will be datachunkoffset +
// data chunk length + page offset
int copySourcePoint = dimensionRawColumnChunk.getOffSet() + dimensionChunksLength.get(dimensionRawColumnChunk.getBlockletId()) + dataChunk3.getPage_offset().get(pageNumber);
// first read the data and uncompressed it
dataPage = COMPRESSOR.unCompressByte(rawData.array(), copySourcePoint, dimensionColumnChunk.data_page_length);
copySourcePoint += dimensionColumnChunk.data_page_length;
// if row id block is present then read the row id chunk and uncompress it
if (hasEncoding(dimensionColumnChunk.encoders, Encoding.INVERTED_INDEX)) {
invertedIndexes = CarbonUtil.getUnCompressColumnIndex(dimensionColumnChunk.rowid_page_length, rawData, copySourcePoint);
copySourcePoint += dimensionColumnChunk.rowid_page_length;
// get the reverse index
invertedIndexesReverse = getInvertedReverseIndex(invertedIndexes);
}
//then actual data based on rle block
if (hasEncoding(dimensionColumnChunk.encoders, Encoding.RLE)) {
rlePage = CarbonUtil.getIntArray(rawData, copySourcePoint, dimensionColumnChunk.rle_page_length);
// uncompress the data with rle indexes
dataPage = UnBlockIndexer.uncompressData(dataPage, rlePage, eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()]);
rlePage = null;
}
// fill chunk attributes
DimensionColumnDataChunk columnDataChunk = null;
if (dimensionColumnChunk.isRowMajor()) {
// to store fixed length column chunk values
columnDataChunk = new ColumnGroupDimensionDataChunk(dataPage, eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()], dimensionRawColumnChunk.getRowCount()[pageNumber]);
} else // and set to data chunk instance
if (!hasEncoding(dimensionColumnChunk.encoders, Encoding.DICTIONARY)) {
columnDataChunk = new VariableLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, dimensionRawColumnChunk.getRowCount()[pageNumber]);
} else {
// to store fixed length column chunk values
columnDataChunk = new FixedLengthDimensionDataChunk(dataPage, invertedIndexes, invertedIndexesReverse, dimensionRawColumnChunk.getRowCount()[pageNumber], eachColumnValueSize[dimensionRawColumnChunk.getBlockletId()]);
}
return columnDataChunk;
}
Aggregations