use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.
the class CarbonMetadataUtil method setBlockletIndex.
private static void setBlockletIndex(FileFooter footer, List<BlockletInfoColumnar> listOfNodeInfo) {
List<BlockletIndex> blockletIndexList = footer.getBlocklet_index_list();
for (int i = 0; i < blockletIndexList.size(); i++) {
BlockletBTreeIndex bTreeIndexList = blockletIndexList.get(i).getB_tree_index();
BlockletMinMaxIndex minMaxIndexList = blockletIndexList.get(i).getMin_max_index();
listOfNodeInfo.get(i).setStartKey(bTreeIndexList.getStart_key());
listOfNodeInfo.get(i).setEndKey(bTreeIndexList.getEnd_key());
byte[][] min = new byte[minMaxIndexList.getMin_values().size()][];
byte[][] max = new byte[minMaxIndexList.getMax_values().size()][];
for (int j = 0; j < minMaxIndexList.getMax_valuesSize(); j++) {
min[j] = minMaxIndexList.getMin_values().get(j).array();
max[j] = minMaxIndexList.getMax_values().get(j).array();
}
listOfNodeInfo.get(i).setColumnMaxData(max);
}
}
use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.
the class CarbonMetadataUtil method getDatachunk2.
/**
* Below method will be used to get the data chunk object for all the columns
*
* @param nodeHolderList blocklet info
* @param columnSchema list of columns
* @param segmentProperties segment properties
* @return list of data chunks
* @throws IOException
*/
private static List<DataChunk2> getDatachunk2(List<NodeHolder> nodeHolderList, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties, int index, boolean isDimensionColumn) throws IOException {
List<DataChunk2> colDataChunks = new ArrayList<DataChunk2>();
DataChunk2 dataChunk = null;
NodeHolder nodeHolder = null;
for (int i = 0; i < nodeHolderList.size(); i++) {
nodeHolder = nodeHolderList.get(i);
dataChunk = new DataChunk2();
dataChunk.min_max = new BlockletMinMaxIndex();
dataChunk.setChunk_meta(getChunkCompressionMeta());
dataChunk.setNumberOfRowsInpage(nodeHolder.getEntryCount());
List<Encoding> encodings = new ArrayList<Encoding>();
if (isDimensionColumn) {
dataChunk.setData_page_length(nodeHolder.getKeyLengths()[index]);
if (containsEncoding(index, Encoding.DICTIONARY, columnSchema, segmentProperties)) {
encodings.add(Encoding.DICTIONARY);
}
if (containsEncoding(index, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) {
encodings.add(Encoding.DIRECT_DICTIONARY);
}
dataChunk.setRowMajor(nodeHolder.getColGrpBlocks()[index]);
// here.
if (nodeHolder.getAggBlocks()[index]) {
dataChunk.setRle_page_length(nodeHolder.getDataIndexMapLength()[index]);
encodings.add(Encoding.RLE);
}
dataChunk.setSort_state(nodeHolder.getIsSortedKeyBlock()[index] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE);
if (!nodeHolder.getIsSortedKeyBlock()[index]) {
dataChunk.setRowid_page_length(nodeHolder.getKeyBlockIndexLength()[index]);
encodings.add(Encoding.INVERTED_INDEX);
}
dataChunk.min_max.addToMax_values(ByteBuffer.wrap(nodeHolder.getColumnMaxData()[index]));
dataChunk.min_max.addToMin_values(ByteBuffer.wrap(nodeHolder.getColumnMinData()[index]));
} else {
dataChunk.setData_page_length(nodeHolder.getDataArray()[index].length);
// TODO : Right now the encodings are happening at runtime. change as
// per this encoders.
dataChunk.setEncoders(encodings);
dataChunk.setRowMajor(false);
// TODO : Right now the encodings are happening at runtime. change as
// per this encoders.
encodings.add(Encoding.DELTA);
dataChunk.setEncoders(encodings);
// TODO writing dummy presence meta need to set actual presence
// meta
PresenceMeta presenceMeta = new PresenceMeta();
presenceMeta.setPresent_bit_streamIsSet(true);
presenceMeta.setPresent_bit_stream(CompressorFactory.getInstance().getCompressor().compressByte(nodeHolder.getMeasureNullValueIndex()[index].toByteArray()));
dataChunk.setPresence(presenceMeta);
List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>();
encoderMetaList.add(ByteBuffer.wrap(serializeEncodeMetaUsingByteBuffer(createValueEncoderMeta(nodeHolder.getCompressionModel(), index))));
dataChunk.setEncoder_meta(encoderMetaList);
dataChunk.min_max.addToMax_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMaxData()[index]));
dataChunk.min_max.addToMin_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMinData()[index]));
}
dataChunk.setEncoders(encodings);
colDataChunks.add(dataChunk);
}
return colDataChunks;
}
use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.
the class CarbonMetadataUtilTest method testConvertFileFooter.
@Test
public void testConvertFileFooter() throws Exception {
int[] intArr = { 1, 2, 3, 4, 5 };
boolean[] boolArr = { true, true, true, true, true };
long[] longArr = { 1, 2, 3, 4, 5 };
byte[][] maxByteArr = { { 1, 2 }, { 3, 4 }, { 5, 6 }, { 2, 4 }, { 1, 2 } };
int[] cardinality = { 1, 2, 3, 4, 5 };
org.apache.carbondata.core.metadata.datatype.DataType[] dataType = { org.apache.carbondata.core.metadata.datatype.DataType.INT, org.apache.carbondata.core.metadata.datatype.DataType.INT, org.apache.carbondata.core.metadata.datatype.DataType.INT, org.apache.carbondata.core.metadata.datatype.DataType.INT, org.apache.carbondata.core.metadata.datatype.DataType.INT };
org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema colSchema = new org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema();
org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema colSchema1 = new org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema();
List<org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema> columnSchemaList = new ArrayList<>();
columnSchemaList.add(colSchema);
columnSchemaList.add(colSchema1);
SegmentProperties segmentProperties = new SegmentProperties(columnSchemaList, cardinality);
final List<Integer> integerList = new ArrayList<>();
integerList.add(new Integer("1"));
integerList.add(new Integer("2"));
WriterCompressModel writerCompressModel = new WriterCompressModel();
writerCompressModel.setMaxValue(objMaxArr);
writerCompressModel.setMinValue(objMinArr);
writerCompressModel.setDataTypeSelected(byteArr);
writerCompressModel.setMantissa(intArr);
writerCompressModel.setType(dataType);
writerCompressModel.setUniqueValue(objMinArr);
BlockletInfoColumnar blockletInfoColumnar = new BlockletInfoColumnar();
BitSet[] bitSetArr = new BitSet[6];
bitSetArr[0] = new BitSet();
bitSetArr[1] = new BitSet();
bitSetArr[2] = new BitSet();
bitSetArr[3] = new BitSet();
bitSetArr[4] = new BitSet();
bitSetArr[5] = new BitSet();
blockletInfoColumnar.setColumnMaxData(maxByteArr);
blockletInfoColumnar.setColumnMinData(maxByteArr);
blockletInfoColumnar.setKeyLengths(intArr);
blockletInfoColumnar.setColGrpBlocks(boolArr);
blockletInfoColumnar.setKeyOffSets(longArr);
blockletInfoColumnar.setDataIndexMapOffsets(longArr);
blockletInfoColumnar.setAggKeyBlock(boolArr);
blockletInfoColumnar.setDataIndexMapLength(intArr);
blockletInfoColumnar.setIsSortedKeyColumn(boolArr);
blockletInfoColumnar.setKeyOffSets(longArr);
blockletInfoColumnar.setMeasureLength(intArr);
blockletInfoColumnar.setMeasureOffset(longArr);
blockletInfoColumnar.setMeasureNullValueIndex(bitSetArr);
blockletInfoColumnar.setCompressionModel(writerCompressModel);
BlockletInfoColumnar blockletInfoColumnar1 = new BlockletInfoColumnar();
blockletInfoColumnar1.setColumnMaxData(maxByteArr);
blockletInfoColumnar1.setColumnMinData(maxByteArr);
blockletInfoColumnar1.setKeyLengths(intArr);
blockletInfoColumnar1.setKeyOffSets(longArr);
blockletInfoColumnar1.setDataIndexMapOffsets(longArr);
blockletInfoColumnar1.setAggKeyBlock(boolArr);
blockletInfoColumnar1.setDataIndexMapLength(intArr);
blockletInfoColumnar1.setIsSortedKeyColumn(boolArr);
blockletInfoColumnar1.setColGrpBlocks(boolArr);
blockletInfoColumnar1.setKeyOffSets(longArr);
blockletInfoColumnar1.setMeasureLength(intArr);
blockletInfoColumnar1.setMeasureOffset(longArr);
blockletInfoColumnar1.setMeasureNullValueIndex(bitSetArr);
blockletInfoColumnar1.setCompressionModel(writerCompressModel);
blockletInfoColumnar1.setColGrpBlocks(boolArr);
List<BlockletInfoColumnar> blockletInfoColumnarList = new ArrayList<>();
blockletInfoColumnarList.add(blockletInfoColumnar);
blockletInfoColumnarList.add(blockletInfoColumnar1);
new MockUp<CarbonUtil>() {
@SuppressWarnings("unused")
@Mock
public List<Integer> convertToIntegerList(int[] array) {
return integerList;
}
};
final Set<Integer> integerSet = new HashSet<>();
integerSet.add(new Integer("1"));
integerSet.add(new Integer("2"));
new MockUp<SegmentProperties>() {
@SuppressWarnings("unused")
@Mock
public Set<Integer> getDimensionOrdinalForBlock(int blockIndex) {
return integerSet;
}
};
SegmentInfo segmentInfo = new SegmentInfo();
segmentInfo.setNum_cols(4);
segmentInfo.setColumn_cardinalities(integerList);
FileFooter fileFooter = new FileFooter();
fileFooter.setNum_rows(4);
fileFooter.setSegment_info(segmentInfo);
byte[] byteMaxArr = "1".getBytes();
byte[] byteMinArr = "2".getBytes();
BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(byteMaxArr));
blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(byteMinArr));
FileFooter result = convertFileFooter(blockletInfoColumnarList, 4, cardinality, columnSchemas, segmentProperties);
assertEquals(result.getTable_columns(), columnSchemas);
}
Aggregations