use of org.apache.carbondata.core.metadata.BlockletInfoColumnar in project carbondata by apache.
the class CarbonFactDataWriterImplV2 method getBlockletInfo.
/**
* This method will be used to get the blocklet metadata
*
* @return BlockletInfo - blocklet metadata
*/
protected BlockletInfoColumnar getBlockletInfo(NodeHolder nodeHolder, long offset) {
// create the info object for leaf entry
BlockletInfoColumnar info = new BlockletInfoColumnar();
//add aggBlocks array
info.setAggKeyBlock(nodeHolder.getAggBlocks());
// add total entry count
info.setNumberOfKeys(nodeHolder.getEntryCount());
// add the key array length
info.setKeyLengths(nodeHolder.getKeyLengths());
// adding null measure index bit set
info.setMeasureNullValueIndex(nodeHolder.getMeasureNullValueIndex());
//add column min max length
info.setColumnMaxData(nodeHolder.getColumnMaxData());
info.setColumnMinData(nodeHolder.getColumnMinData());
// add measure length
info.setMeasureLength(nodeHolder.getMeasureLenght());
info.setIsSortedKeyColumn(nodeHolder.getIsSortedKeyBlock());
info.setKeyBlockIndexLength(nodeHolder.getKeyBlockIndexLength());
info.setDataIndexMapLength(nodeHolder.getDataIndexMapLength());
// set startkey
info.setStartKey(nodeHolder.getStartKey());
// set end key
info.setEndKey(nodeHolder.getEndKey());
info.setCompressionModel(nodeHolder.getCompressionModel());
// return leaf metadata
//colGroup Blocks
info.setColGrpBlocks(nodeHolder.getColGrpBlocks());
return info;
}
use of org.apache.carbondata.core.metadata.BlockletInfoColumnar in project carbondata by apache.
the class CarbonMetadataUtil method convertBlockletInfo.
/**
* It converts FileFooter thrift object to list of BlockletInfoColumnar
* objects
*
* @param footer
* @return
*/
public static List<BlockletInfoColumnar> convertBlockletInfo(FileFooter footer) throws IOException {
List<BlockletInfoColumnar> listOfNodeInfo = new ArrayList<BlockletInfoColumnar>(CarbonCommonConstants.CONSTANT_SIZE_TEN);
for (BlockletInfo blockletInfo : footer.getBlocklet_info_list()) {
BlockletInfoColumnar blockletInfoColumnar = new BlockletInfoColumnar();
blockletInfoColumnar.setNumberOfKeys(blockletInfo.getNum_rows());
List<DataChunk> columnChunks = blockletInfo.getColumn_data_chunks();
List<DataChunk> dictChunks = new ArrayList<DataChunk>();
List<DataChunk> nonDictColChunks = new ArrayList<DataChunk>();
for (DataChunk dataChunk : columnChunks) {
if (dataChunk.getEncoders().get(0).equals(Encoding.DICTIONARY)) {
dictChunks.add(dataChunk);
} else {
nonDictColChunks.add(dataChunk);
}
}
int[] keyLengths = new int[dictChunks.size()];
long[] keyOffSets = new long[dictChunks.size()];
long[] keyBlockIndexOffsets = new long[dictChunks.size()];
int[] keyBlockIndexLens = new int[dictChunks.size()];
long[] indexMapOffsets = new long[dictChunks.size()];
int[] indexMapLens = new int[dictChunks.size()];
boolean[] sortState = new boolean[dictChunks.size()];
int i = 0;
for (DataChunk dataChunk : dictChunks) {
keyLengths[i] = dataChunk.getData_page_length();
keyOffSets[i] = dataChunk.getData_page_offset();
keyBlockIndexOffsets[i] = dataChunk.getRowid_page_offset();
keyBlockIndexLens[i] = dataChunk.getRowid_page_length();
indexMapOffsets[i] = dataChunk.getRle_page_offset();
indexMapLens[i] = dataChunk.getRle_page_length();
sortState[i] = dataChunk.getSort_state().equals(SortState.SORT_EXPLICIT);
i++;
}
blockletInfoColumnar.setKeyLengths(keyLengths);
blockletInfoColumnar.setKeyOffSets(keyOffSets);
blockletInfoColumnar.setKeyBlockIndexOffSets(keyBlockIndexOffsets);
blockletInfoColumnar.setKeyBlockIndexLength(keyBlockIndexLens);
blockletInfoColumnar.setDataIndexMapOffsets(indexMapOffsets);
blockletInfoColumnar.setDataIndexMapLength(indexMapLens);
blockletInfoColumnar.setIsSortedKeyColumn(sortState);
int[] msrLens = new int[nonDictColChunks.size()];
long[] msrOffsets = new long[nonDictColChunks.size()];
ValueEncoderMeta[] encoderMetas = new ValueEncoderMeta[nonDictColChunks.size()];
i = 0;
for (DataChunk msrChunk : nonDictColChunks) {
msrLens[i] = msrChunk.getData_page_length();
msrOffsets[i] = msrChunk.getData_page_offset();
encoderMetas[i] = deserializeValueEncoderMeta(msrChunk.getEncoder_meta().get(0));
i++;
}
blockletInfoColumnar.setMeasureLength(msrLens);
blockletInfoColumnar.setMeasureOffset(msrOffsets);
blockletInfoColumnar.setCompressionModel(getValueCompressionModel(encoderMetas));
listOfNodeInfo.add(blockletInfoColumnar);
}
setBlockletIndex(footer, listOfNodeInfo);
return listOfNodeInfo;
}
use of org.apache.carbondata.core.metadata.BlockletInfoColumnar in project carbondata by apache.
the class CarbonFactDataWriterImplV1 method writeBlockletData.
@Override
public void writeBlockletData(NodeHolder holder) throws CarbonDataWriterException {
if (holder.getEntryCount() == 0) {
return;
}
int indexBlockSize = 0;
for (int i = 0; i < holder.getKeyBlockIndexLength().length; i++) {
indexBlockSize += holder.getKeyBlockIndexLength()[i] + CarbonCommonConstants.INT_SIZE_IN_BYTE;
}
for (int i = 0; i < holder.getDataIndexMapLength().length; i++) {
indexBlockSize += holder.getDataIndexMapLength()[i];
}
long blockletDataSize = holder.getTotalDimensionArrayLength() + holder.getTotalMeasureArrayLength() + indexBlockSize;
updateBlockletFileChannel(blockletDataSize);
// write data to file and get its offset
long offset = writeDataToFile(holder, fileChannel);
// get the blocklet info for currently added blocklet
BlockletInfoColumnar blockletInfo = getBlockletInfo(holder, offset);
// add blocklet info to list
blockletInfoList.add(blockletInfo);
LOGGER.info("A new blocklet is added, its data size is: " + blockletDataSize + " Byte");
}
use of org.apache.carbondata.core.metadata.BlockletInfoColumnar in project carbondata by apache.
the class CarbonFactDataWriterImplV2 method writeBlockletData.
/**
* Below method will be used to write the data to carbon data file
*
* @param holder
* @throws CarbonDataWriterException any problem in writing operation
*/
@Override
public void writeBlockletData(NodeHolder holder) throws CarbonDataWriterException {
if (holder.getEntryCount() == 0) {
return;
}
// size to calculate the size of the blocklet
int size = 0;
// get the blocklet info object
BlockletInfoColumnar blockletInfo = getBlockletInfo(holder, 0);
List<DataChunk2> datachunks = null;
try {
// get all the data chunks
datachunks = CarbonMetadataUtil.getDatachunk2(blockletInfo, thriftColumnSchemaList, dataWriterVo.getSegmentProperties());
} catch (IOException e) {
throw new CarbonDataWriterException("Problem while getting the data chunks", e);
}
// data chunk byte array
byte[][] dataChunkByteArray = new byte[datachunks.size()][];
for (int i = 0; i < dataChunkByteArray.length; i++) {
dataChunkByteArray[i] = CarbonUtil.getByteArray(datachunks.get(i));
// add the data chunk size
size += dataChunkByteArray[i].length;
}
// add row id index length
for (int i = 0; i < holder.getKeyBlockIndexLength().length; i++) {
size += holder.getKeyBlockIndexLength()[i];
}
// add rle index length
for (int i = 0; i < holder.getDataIndexMapLength().length; i++) {
size += holder.getDataIndexMapLength()[i];
}
// add dimension column data page and measure column data page size
long blockletDataSize = holder.getTotalDimensionArrayLength() + holder.getTotalMeasureArrayLength() + size;
// if size of the file already reached threshold size then create a new file and get the file
// channel object
updateBlockletFileChannel(blockletDataSize);
// this is done so carbondata file can be read separately
try {
if (fileChannel.size() == 0) {
ColumnarFormatVersion version = CarbonProperties.getInstance().getFormatVersion();
byte[] header = (CarbonCommonConstants.CARBON_DATA_VERSION_HEADER + version).getBytes();
ByteBuffer buffer = ByteBuffer.allocate(header.length);
buffer.put(header);
buffer.rewind();
fileChannel.write(buffer);
}
} catch (IOException e) {
throw new CarbonDataWriterException("Problem while getting the file channel size", e);
}
// write data to file and get its offset
writeDataToFile(holder, dataChunkByteArray, fileChannel);
// add blocklet info to list
blockletInfoList.add(blockletInfo);
LOGGER.info("A new blocklet is added, its data size is: " + blockletDataSize + " Byte");
}
use of org.apache.carbondata.core.metadata.BlockletInfoColumnar in project carbondata by apache.
the class CarbonFooterWriterTest method testWriteFactMetadata.
/**
* test writing fact metadata.
*/
@Test
public void testWriteFactMetadata() throws IOException {
deleteFile();
createFile();
CarbonFooterWriter writer = new CarbonFooterWriter(filePath);
List<BlockletInfoColumnar> infoColumnars = getBlockletInfoColumnars();
int[] cardinalities = new int[] { 2, 4, 5, 7, 9, 10 };
List<ColumnSchema> columnSchema = Arrays.asList(new ColumnSchema[] { getDimensionColumn("IMEI1"), getDimensionColumn("IMEI2"), getDimensionColumn("IMEI3"), getDimensionColumn("IMEI4"), getDimensionColumn("IMEI5"), getDimensionColumn("IMEI6") });
List<org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema> wrapperColumnSchema = Arrays.asList(new org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema[] { getWrapperDimensionColumn("IMEI1"), getWrapperDimensionColumn("IMEI2"), getWrapperDimensionColumn("IMEI3"), getWrapperDimensionColumn("IMEI4"), getWrapperDimensionColumn("IMEI5"), getWrapperDimensionColumn("IMEI6") });
int[] colCardinality = CarbonUtil.getFormattedCardinality(cardinalities, wrapperColumnSchema);
SegmentProperties segmentProperties = new SegmentProperties(wrapperColumnSchema, colCardinality);
writer.writeFooter(CarbonMetadataUtil.convertFileFooter(infoColumnars, 6, cardinalities, columnSchema, segmentProperties), 0);
CarbonFooterReader metaDataReader = new CarbonFooterReader(filePath, 0);
assertTrue(metaDataReader.readFooter() != null);
}
Aggregations