use of org.apache.carbondata.format.DataChunk in project carbondata by apache.
the class CarbonMetadataUtil method getBlockletInfo.
private static BlockletInfo getBlockletInfo(BlockletInfoColumnar blockletInfoColumnar, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties) throws IOException {
BlockletInfo blockletInfo = new BlockletInfo();
blockletInfo.setNum_rows(blockletInfoColumnar.getNumberOfKeys());
List<DataChunk> colDataChunks = new ArrayList<DataChunk>();
int j = 0;
int aggregateIndex = 0;
boolean[] isSortedKeyColumn = blockletInfoColumnar.getIsSortedKeyColumn();
boolean[] aggKeyBlock = blockletInfoColumnar.getAggKeyBlock();
boolean[] colGrpblock = blockletInfoColumnar.getColGrpBlocks();
for (int i = 0; i < blockletInfoColumnar.getKeyLengths().length; i++) {
DataChunk dataChunk = new DataChunk();
dataChunk.setChunk_meta(getChunkCompressionMeta());
List<Encoding> encodings = new ArrayList<Encoding>();
if (containsEncoding(i, Encoding.DICTIONARY, columnSchema, segmentProperties)) {
encodings.add(Encoding.DICTIONARY);
}
if (containsEncoding(i, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) {
encodings.add(Encoding.DIRECT_DICTIONARY);
}
dataChunk.setRowMajor(colGrpblock[i]);
// TODO : Once schema PR is merged and information needs to be passed
// here.
dataChunk.setColumn_ids(new ArrayList<Integer>());
dataChunk.setData_page_length(blockletInfoColumnar.getKeyLengths()[i]);
dataChunk.setData_page_offset(blockletInfoColumnar.getKeyOffSets()[i]);
if (aggKeyBlock[i]) {
dataChunk.setRle_page_offset(blockletInfoColumnar.getDataIndexMapOffsets()[aggregateIndex]);
dataChunk.setRle_page_length(blockletInfoColumnar.getDataIndexMapLength()[aggregateIndex]);
encodings.add(Encoding.RLE);
aggregateIndex++;
}
dataChunk.setSort_state(isSortedKeyColumn[i] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE);
if (!isSortedKeyColumn[i]) {
dataChunk.setRowid_page_offset(blockletInfoColumnar.getKeyBlockIndexOffSets()[j]);
dataChunk.setRowid_page_length(blockletInfoColumnar.getKeyBlockIndexLength()[j]);
if (!encodings.contains(Encoding.INVERTED_INDEX)) {
encodings.add(Encoding.INVERTED_INDEX);
}
j++;
}
// TODO : Right now the encodings are happening at runtime. change as per
// this encoders.
dataChunk.setEncoders(encodings);
colDataChunks.add(dataChunk);
}
for (int i = 0; i < blockletInfoColumnar.getMeasureLength().length; i++) {
DataChunk dataChunk = new DataChunk();
dataChunk.setChunk_meta(getChunkCompressionMeta());
dataChunk.setRowMajor(false);
// TODO : Once schema PR is merged and information needs to be passed
// here.
dataChunk.setColumn_ids(new ArrayList<Integer>());
dataChunk.setData_page_length(blockletInfoColumnar.getMeasureLength()[i]);
dataChunk.setData_page_offset(blockletInfoColumnar.getMeasureOffset()[i]);
// TODO : Right now the encodings are happening at runtime. change as per
// this encoders.
List<Encoding> encodings = new ArrayList<Encoding>();
encodings.add(Encoding.DELTA);
dataChunk.setEncoders(encodings);
// TODO writing dummy presence meta need to set actual presence
// meta
PresenceMeta presenceMeta = new PresenceMeta();
presenceMeta.setPresent_bit_streamIsSet(true);
presenceMeta.setPresent_bit_stream(blockletInfoColumnar.getMeasureNullValueIndex()[i].toByteArray());
dataChunk.setPresence(presenceMeta);
// TODO : PresenceMeta needs to be implemented and set here
// dataChunk.setPresence(new PresenceMeta());
// TODO : Need to write ValueCompression meta here.
List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>();
encoderMetaList.add(ByteBuffer.wrap(serializeEncoderMeta(createValueEncoderMeta(blockletInfoColumnar.getCompressionModel(), i))));
dataChunk.setEncoder_meta(encoderMetaList);
colDataChunks.add(dataChunk);
}
blockletInfo.setColumn_data_chunks(colDataChunks);
return blockletInfo;
}
use of org.apache.carbondata.format.DataChunk in project carbondata by apache.
the class CarbonMetadataUtil method convertBlockletInfo.
/**
* It converts FileFooter thrift object to list of BlockletInfoColumnar
* objects
*
* @param footer
* @return
*/
public static List<BlockletInfoColumnar> convertBlockletInfo(FileFooter footer) throws IOException {
List<BlockletInfoColumnar> listOfNodeInfo = new ArrayList<BlockletInfoColumnar>(CarbonCommonConstants.CONSTANT_SIZE_TEN);
for (BlockletInfo blockletInfo : footer.getBlocklet_info_list()) {
BlockletInfoColumnar blockletInfoColumnar = new BlockletInfoColumnar();
blockletInfoColumnar.setNumberOfKeys(blockletInfo.getNum_rows());
List<DataChunk> columnChunks = blockletInfo.getColumn_data_chunks();
List<DataChunk> dictChunks = new ArrayList<DataChunk>();
List<DataChunk> nonDictColChunks = new ArrayList<DataChunk>();
for (DataChunk dataChunk : columnChunks) {
if (dataChunk.getEncoders().get(0).equals(Encoding.DICTIONARY)) {
dictChunks.add(dataChunk);
} else {
nonDictColChunks.add(dataChunk);
}
}
int[] keyLengths = new int[dictChunks.size()];
long[] keyOffSets = new long[dictChunks.size()];
long[] keyBlockIndexOffsets = new long[dictChunks.size()];
int[] keyBlockIndexLens = new int[dictChunks.size()];
long[] indexMapOffsets = new long[dictChunks.size()];
int[] indexMapLens = new int[dictChunks.size()];
boolean[] sortState = new boolean[dictChunks.size()];
int i = 0;
for (DataChunk dataChunk : dictChunks) {
keyLengths[i] = dataChunk.getData_page_length();
keyOffSets[i] = dataChunk.getData_page_offset();
keyBlockIndexOffsets[i] = dataChunk.getRowid_page_offset();
keyBlockIndexLens[i] = dataChunk.getRowid_page_length();
indexMapOffsets[i] = dataChunk.getRle_page_offset();
indexMapLens[i] = dataChunk.getRle_page_length();
sortState[i] = dataChunk.getSort_state().equals(SortState.SORT_EXPLICIT);
i++;
}
blockletInfoColumnar.setKeyLengths(keyLengths);
blockletInfoColumnar.setKeyOffSets(keyOffSets);
blockletInfoColumnar.setKeyBlockIndexOffSets(keyBlockIndexOffsets);
blockletInfoColumnar.setKeyBlockIndexLength(keyBlockIndexLens);
blockletInfoColumnar.setDataIndexMapOffsets(indexMapOffsets);
blockletInfoColumnar.setDataIndexMapLength(indexMapLens);
blockletInfoColumnar.setIsSortedKeyColumn(sortState);
int[] msrLens = new int[nonDictColChunks.size()];
long[] msrOffsets = new long[nonDictColChunks.size()];
ValueEncoderMeta[] encoderMetas = new ValueEncoderMeta[nonDictColChunks.size()];
i = 0;
for (DataChunk msrChunk : nonDictColChunks) {
msrLens[i] = msrChunk.getData_page_length();
msrOffsets[i] = msrChunk.getData_page_offset();
encoderMetas[i] = deserializeValueEncoderMeta(msrChunk.getEncoder_meta().get(0));
i++;
}
blockletInfoColumnar.setMeasureLength(msrLens);
blockletInfoColumnar.setMeasureOffset(msrOffsets);
blockletInfoColumnar.setCompressionModel(getValueCompressionModel(encoderMetas));
listOfNodeInfo.add(blockletInfoColumnar);
}
setBlockletIndex(footer, listOfNodeInfo);
return listOfNodeInfo;
}
use of org.apache.carbondata.format.DataChunk in project carbondata by apache.
the class CarbonMetadataUtilTest method setUp.
@BeforeClass
public static void setUp() {
objMaxArr = new Long[6];
objMaxArr[0] = new Long("111111");
objMaxArr[1] = new Long("121111");
objMaxArr[2] = new Long("131111");
objMaxArr[3] = new Long("141111");
objMaxArr[4] = new Long("151111");
objMaxArr[5] = new Long("161111");
objMinArr = new Long[6];
objMinArr[0] = new Long("119");
objMinArr[1] = new Long("121");
objMinArr[2] = new Long("131");
objMinArr[3] = new Long("141");
objMinArr[4] = new Long("151");
objMinArr[5] = new Long("161");
objDecimal = new int[] { 0, 0, 0, 0, 0, 0 };
columnSchemaList = new ArrayList<>();
List<Encoding> encodingList = new ArrayList<>();
encodingList.add(Encoding.BIT_PACKED);
encodingList.add(Encoding.DELTA);
encodingList.add(Encoding.INVERTED_INDEX);
encodingList.add(Encoding.DIRECT_DICTIONARY);
byteArr = "412111".getBytes();
byte[] byteArr1 = "321".getBytes();
byte[] byteArr2 = "356".getBytes();
byteBufferList = new ArrayList<>();
ByteBuffer bb = ByteBuffer.allocate(byteArr.length);
bb.put(byteArr);
ByteBuffer bb1 = ByteBuffer.allocate(byteArr1.length);
bb1.put(byteArr1);
ByteBuffer bb2 = ByteBuffer.allocate(byteArr2.length);
bb2.put(byteArr2);
byteBufferList.add(bb);
byteBufferList.add(bb1);
byteBufferList.add(bb2);
DataChunk dataChunk = new DataChunk();
dataChunk.setEncoders(encodingList);
dataChunk.setEncoder_meta(byteBufferList);
List<DataChunk> dataChunkList = new ArrayList<>();
dataChunkList.add(dataChunk);
dataChunkList.add(dataChunk);
BlockletInfo blockletInfo = new BlockletInfo();
blockletInfo.setColumn_data_chunks(dataChunkList);
blockletInfoList = new ArrayList<>();
blockletInfoList.add(blockletInfo);
blockletInfoList.add(blockletInfo);
ValueEncoderMeta meta = CarbonTestUtil.createValueEncoderMeta();
meta.setDecimal(5);
meta.setMinValue(objMinArr);
meta.setMaxValue(objMaxArr);
meta.setType(org.apache.carbondata.core.metadata.datatype.DataType.DOUBLE_MEASURE_CHAR);
List<Encoding> encoders = new ArrayList<>();
encoders.add(Encoding.INVERTED_INDEX);
encoders.add(Encoding.BIT_PACKED);
encoders.add(Encoding.DELTA);
encoders.add(Encoding.DICTIONARY);
encoders.add(Encoding.DIRECT_DICTIONARY);
encoders.add(Encoding.RLE);
ColumnSchema columnSchema = new ColumnSchema(DataType.INT, "column", "3", true, encoders, true);
ColumnSchema columnSchema1 = new ColumnSchema(DataType.ARRAY, "column", "3", true, encoders, true);
ColumnSchema columnSchema2 = new ColumnSchema(DataType.DECIMAL, "column", "3", true, encoders, true);
ColumnSchema columnSchema3 = new ColumnSchema(DataType.DOUBLE, "column", "3", true, encoders, true);
ColumnSchema columnSchema4 = new ColumnSchema(DataType.LONG, "column", "3", true, encoders, true);
ColumnSchema columnSchema5 = new ColumnSchema(DataType.SHORT, "column", "3", true, encoders, true);
ColumnSchema columnSchema6 = new ColumnSchema(DataType.STRUCT, "column", "3", true, encoders, true);
ColumnSchema columnSchema7 = new ColumnSchema(DataType.STRING, "column", "3", true, encoders, true);
columnSchemas = new ArrayList<>();
columnSchemas.add(columnSchema);
columnSchemas.add(columnSchema1);
columnSchemas.add(columnSchema2);
columnSchemas.add(columnSchema3);
columnSchemas.add(columnSchema4);
columnSchemas.add(columnSchema5);
columnSchemas.add(columnSchema6);
columnSchemas.add(columnSchema7);
}
Aggregations