use of org.apache.carbondata.format.BlockletInfo3 in project carbondata by apache.
the class CarbonFactDataWriterImplV3 method writeDataToFile.
/**
* Below method will be used to write data in carbon data file
* Data Format
* <Column1 Data ChunkV3><Column1<Page1><Page2><Page3><Page4>>
* <Column2 Data ChunkV3><Column2<Page1><Page2><Page3><Page4>>
* <Column3 Data ChunkV3><Column3<Page1><Page2><Page3><Page4>>
* <Column4 Data ChunkV3><Column4<Page1><Page2><Page3><Page4>>
* Each page will contain column data, Inverted index and rle index
*
* @param channel
* @param dataChunkBytes
*/
private void writeDataToFile(FileChannel channel, byte[][] dataChunkBytes) {
long offset = 0;
// write the header
try {
if (fileChannel.size() == 0) {
// below code is to write the file header
byte[] fileHeader = CarbonUtil.getByteArray(CarbonMetadataUtil.getFileHeader(true, thriftColumnSchemaList, dataWriterVo.getSchemaUpdatedTimeStamp()));
ByteBuffer buffer = ByteBuffer.allocate(fileHeader.length);
buffer.put(fileHeader);
buffer.flip();
fileChannel.write(buffer);
}
offset = channel.size();
} catch (IOException e) {
throw new CarbonDataWriterException("Problem while getting the file channel size");
}
// to maintain the offset of each data chunk in blocklet
List<Long> currentDataChunksOffset = new ArrayList<>();
// to maintain the length of each data chunk in blocklet
List<Integer> currentDataChunksLength = new ArrayList<>();
// get the node holder list
List<NodeHolder> nodeHolderList = dataWriterHolder.getNodeHolder();
int numberOfDimension = nodeHolderList.get(0).getKeyArray().length;
int numberOfMeasures = nodeHolderList.get(0).getDataArray().length;
NodeHolder nodeHolder = null;
ByteBuffer buffer = null;
int bufferSize = 0;
long dimensionOffset = 0;
long measureOffset = 0;
int numberOfRows = 0;
// calculate the number of rows in each blocklet
for (int j = 0; j < nodeHolderList.size(); j++) {
numberOfRows += nodeHolderList.get(j).getEntryCount();
}
try {
for (int i = 0; i < numberOfDimension; i++) {
currentDataChunksOffset.add(offset);
currentDataChunksLength.add(dataChunkBytes[i].length);
buffer = ByteBuffer.allocate(dataChunkBytes[i].length);
buffer.put(dataChunkBytes[i]);
buffer.flip();
fileChannel.write(buffer);
offset += dataChunkBytes[i].length;
for (int j = 0; j < nodeHolderList.size(); j++) {
nodeHolder = nodeHolderList.get(j);
bufferSize = nodeHolder.getKeyLengths()[i] + (!nodeHolder.getIsSortedKeyBlock()[i] ? nodeHolder.getKeyBlockIndexLength()[i] : 0) + (dataWriterVo.getAggBlocks()[i] ? nodeHolder.getCompressedDataIndex()[i].length : 0);
buffer = ByteBuffer.allocate(bufferSize);
buffer.put(nodeHolder.getKeyArray()[i]);
if (!nodeHolder.getIsSortedKeyBlock()[i]) {
buffer.putInt(nodeHolder.getCompressedIndex()[i].length);
buffer.put(nodeHolder.getCompressedIndex()[i]);
if (nodeHolder.getCompressedIndexMap()[i].length > 0) {
buffer.put(nodeHolder.getCompressedIndexMap()[i]);
}
}
if (nodeHolder.getAggBlocks()[i]) {
buffer.put(nodeHolder.getCompressedDataIndex()[i]);
}
buffer.flip();
fileChannel.write(buffer);
offset += bufferSize;
}
}
dimensionOffset = offset;
int dataChunkStartIndex = nodeHolderList.get(0).getKeyArray().length;
for (int i = 0; i < numberOfMeasures; i++) {
nodeHolderList = dataWriterHolder.getNodeHolder();
currentDataChunksOffset.add(offset);
currentDataChunksLength.add(dataChunkBytes[dataChunkStartIndex].length);
buffer = ByteBuffer.allocate(dataChunkBytes[dataChunkStartIndex].length);
buffer.put(dataChunkBytes[dataChunkStartIndex]);
buffer.flip();
fileChannel.write(buffer);
offset += dataChunkBytes[dataChunkStartIndex].length;
dataChunkStartIndex++;
for (int j = 0; j < nodeHolderList.size(); j++) {
nodeHolder = nodeHolderList.get(j);
bufferSize = nodeHolder.getDataArray()[i].length;
buffer = ByteBuffer.allocate(bufferSize);
buffer.put(nodeHolder.getDataArray()[i]);
buffer.flip();
fileChannel.write(buffer);
offset += bufferSize;
}
}
measureOffset = offset;
} catch (IOException e) {
throw new CarbonDataWriterException("Problem while writing the data", e);
}
blockletIndex.add(CarbonMetadataUtil.getBlockletIndex(nodeHolderList, dataWriterVo.getSegmentProperties().getMeasures()));
BlockletInfo3 blockletInfo3 = new BlockletInfo3(numberOfRows, currentDataChunksOffset, currentDataChunksLength, dimensionOffset, measureOffset, dataWriterHolder.getNodeHolder().size());
blockletMetadata.add(blockletInfo3);
}
Aggregations