use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.
the class CarbonMetadataUtil method getBlockletIndex.
public static BlockletIndex getBlockletIndex(List<NodeHolder> nodeHolderList, List<CarbonMeasure> carbonMeasureList) {
BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
// Calculating min/max for every each column.
byte[][] minCol = nodeHolderList.get(0).getColumnMinData().clone();
byte[][] maxCol = nodeHolderList.get(0).getColumnMaxData().clone();
for (NodeHolder nodeHolder : nodeHolderList) {
byte[][] columnMaxData = nodeHolder.getColumnMaxData();
byte[][] columnMinData = nodeHolder.getColumnMinData();
for (int i = 0; i < maxCol.length; i++) {
if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(columnMaxData[i], maxCol[i]) > 0) {
maxCol[i] = columnMaxData[i];
}
if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(columnMinData[i], minCol[i]) < 0) {
minCol[i] = columnMinData[i];
}
}
}
// Writing min/max to thrift file
for (byte[] max : maxCol) {
blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
}
for (byte[] min : minCol) {
blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
}
byte[][] measureMaxValue = nodeHolderList.get(0).getMeasureColumnMaxData().clone();
byte[][] measureMinValue = nodeHolderList.get(0).getMeasureColumnMinData().clone();
byte[] minVal = null;
byte[] maxVal = null;
for (int i = 1; i < nodeHolderList.size(); i++) {
for (int j = 0; j < measureMinValue.length; j++) {
minVal = nodeHolderList.get(i).getMeasureColumnMinData()[j];
maxVal = nodeHolderList.get(i).getMeasureColumnMaxData()[j];
if (compareMeasureData(measureMaxValue[j], maxVal, carbonMeasureList.get(j).getDataType()) < 0) {
measureMaxValue[j] = maxVal.clone();
}
if (compareMeasureData(measureMinValue[j], minVal, carbonMeasureList.get(j).getDataType()) > 0) {
measureMinValue[j] = minVal.clone();
}
}
}
for (byte[] max : measureMaxValue) {
blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
}
for (byte[] min : measureMinValue) {
blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
}
BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
blockletBTreeIndex.setStart_key(nodeHolderList.get(0).getStartKey());
blockletBTreeIndex.setEnd_key(nodeHolderList.get(nodeHolderList.size() - 1).getEndKey());
BlockletIndex blockletIndex = new BlockletIndex();
blockletIndex.setMin_max_index(blockletMinMaxIndex);
blockletIndex.setB_tree_index(blockletBTreeIndex);
return blockletIndex;
}
use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.
the class CarbonMetadataUtil method getBlockletIndex.
private static BlockletIndex getBlockletIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex info) {
BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
for (int i = 0; i < info.getMinMaxIndex().getMaxValues().length; i++) {
blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(info.getMinMaxIndex().getMaxValues()[i]));
blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(info.getMinMaxIndex().getMinValues()[i]));
}
BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
blockletBTreeIndex.setStart_key(info.getBtreeIndex().getStartKey());
blockletBTreeIndex.setEnd_key(info.getBtreeIndex().getEndKey());
BlockletIndex blockletIndex = new BlockletIndex();
blockletIndex.setMin_max_index(blockletMinMaxIndex);
blockletIndex.setB_tree_index(blockletBTreeIndex);
return blockletIndex;
}
use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.
the class CarbonMetadataUtilTest method testGetBlockIndexInfo.
@Test
public void testGetBlockIndexInfo() throws Exception {
byte[] startKey = { 1, 2, 3, 4, 5 };
byte[] endKey = { 9, 3, 5, 5, 5 };
byte[] byteArr = { 1, 2, 3, 4, 5 };
List<ByteBuffer> minList = new ArrayList<>();
minList.add(ByteBuffer.wrap(byteArr));
byte[] byteArr1 = { 9, 9, 8, 6, 7 };
List<ByteBuffer> maxList = new ArrayList<>();
maxList.add(ByteBuffer.wrap(byteArr1));
org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex blockletMinMaxIndex = new org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex(minList, maxList);
org.apache.carbondata.core.metadata.blocklet.index.BlockletBTreeIndex blockletBTreeIndex = new org.apache.carbondata.core.metadata.blocklet.index.BlockletBTreeIndex(startKey, endKey);
org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex blockletIndex = new org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex(blockletBTreeIndex, blockletMinMaxIndex);
BlockIndexInfo blockIndexInfo = new BlockIndexInfo(1, "file", 1, blockletIndex);
List<BlockIndexInfo> blockIndexInfoList = new ArrayList<>();
blockIndexInfoList.add(blockIndexInfo);
List<BlockIndex> result = getBlockIndexInfo(blockIndexInfoList);
String expected = "file";
assertEquals(result.get(0).file_name, expected);
}
use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.
the class CarbonMetadataUtil method getDataChunk2.
/**
* Below method will be used to get the data chunk2 serialize object list
*
* @param nodeHolder node holder
* @param columnSchema table columns
* @param segmentProperties segment properties
* @param isDimensionColumn to get the list of dimension column or measure column
* @return list of data chunk2
* @throws IOException
*/
public static List<byte[]> getDataChunk2(NodeHolder nodeHolder, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties, boolean isDimensionColumn) throws IOException {
List<byte[]> dataChunkBuffer = new ArrayList<>();
if (isDimensionColumn) {
for (int i = 0; i < nodeHolder.getKeyArray().length; i++) {
DataChunk2 dataChunk = new DataChunk2();
dataChunk.min_max = new BlockletMinMaxIndex();
dataChunk.setChunk_meta(getChunkCompressionMeta());
dataChunk.setNumberOfRowsInpage(nodeHolder.getEntryCount());
List<Encoding> encodings = new ArrayList<Encoding>();
dataChunk.setData_page_length(nodeHolder.getKeyLengths()[i]);
if (containsEncoding(i, Encoding.DICTIONARY, columnSchema, segmentProperties)) {
encodings.add(Encoding.DICTIONARY);
}
if (containsEncoding(i, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) {
encodings.add(Encoding.DIRECT_DICTIONARY);
}
dataChunk.setRowMajor(nodeHolder.getColGrpBlocks()[i]);
if (nodeHolder.getAggBlocks()[i]) {
dataChunk.setRle_page_length(nodeHolder.getDataIndexMapLength()[i]);
encodings.add(Encoding.RLE);
}
dataChunk.setSort_state(nodeHolder.getIsSortedKeyBlock()[i] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE);
if (!nodeHolder.getIsSortedKeyBlock()[i]) {
dataChunk.setRowid_page_length(nodeHolder.getKeyBlockIndexLength()[i]);
encodings.add(Encoding.INVERTED_INDEX);
}
dataChunk.min_max.addToMax_values(ByteBuffer.wrap(nodeHolder.getColumnMaxData()[i]));
dataChunk.min_max.addToMin_values(ByteBuffer.wrap(nodeHolder.getColumnMinData()[i]));
dataChunk.setEncoders(encodings);
dataChunkBuffer.add(CarbonUtil.getByteArray(dataChunk));
}
} else {
for (int i = 0; i < nodeHolder.getDataArray().length; i++) {
DataChunk2 dataChunk = new DataChunk2();
dataChunk.min_max = new BlockletMinMaxIndex();
dataChunk.setChunk_meta(getChunkCompressionMeta());
dataChunk.setNumberOfRowsInpage(nodeHolder.getEntryCount());
dataChunk.setData_page_length(nodeHolder.getDataArray()[i].length);
List<Encoding> encodings = new ArrayList<Encoding>();
// TODO : Right now the encodings are happening at runtime. change as
// per this encoders.
dataChunk.setEncoders(encodings);
dataChunk.setRowMajor(false);
// TODO : Right now the encodings are happening at runtime. change as
// per this encoders.
encodings.add(Encoding.DELTA);
dataChunk.setEncoders(encodings);
// TODO writing dummy presence meta need to set actual presence
// meta
PresenceMeta presenceMeta = new PresenceMeta();
presenceMeta.setPresent_bit_streamIsSet(true);
presenceMeta.setPresent_bit_stream(CompressorFactory.getInstance().getCompressor().compressByte(nodeHolder.getMeasureNullValueIndex()[i].toByteArray()));
dataChunk.setPresence(presenceMeta);
List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>();
encoderMetaList.add(ByteBuffer.wrap(serializeEncodeMetaUsingByteBuffer(createValueEncoderMeta(nodeHolder.getCompressionModel(), i))));
dataChunk.setEncoder_meta(encoderMetaList);
dataChunk.min_max.addToMax_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMaxData()[i]));
dataChunk.min_max.addToMin_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMinData()[i]));
dataChunkBuffer.add(CarbonUtil.getByteArray(dataChunk));
}
}
return dataChunkBuffer;
}
use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.
the class CarbonMetadataUtil method getBlockletIndex.
private static BlockletIndex getBlockletIndex(BlockletInfoColumnar info) {
BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
for (byte[] max : info.getColumnMaxData()) {
blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
}
for (byte[] min : info.getColumnMinData()) {
blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
}
BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
blockletBTreeIndex.setStart_key(info.getStartKey());
blockletBTreeIndex.setEnd_key(info.getEndKey());
BlockletIndex blockletIndex = new BlockletIndex();
blockletIndex.setMin_max_index(blockletMinMaxIndex);
blockletIndex.setB_tree_index(blockletBTreeIndex);
return blockletIndex;
}
Aggregations