use of org.apache.carbondata.core.datastore.blocklet.EncodedBlocklet in project carbondata by apache.
the class CarbonFactDataWriterImplV3 method writeBlockletToFile.
/**
* Write the collect blocklet data (blockletDataHolder) to file
*/
private void writeBlockletToFile() {
// get the list of all encoded table page
EncodedBlocklet encodedBlocklet = blockletDataHolder.getEncodedBlocklet();
int numDimensions = encodedBlocklet.getNumberOfDimension();
int numMeasures = encodedBlocklet.getNumberOfMeasure();
// get data chunks for all the column
byte[][] dataChunkBytes = new byte[numDimensions + numMeasures][];
long metadataSize = fillDataChunk(encodedBlocklet, dataChunkBytes);
// calculate the total size of data to be written
long blockletSize = blockletDataHolder.getSize() + metadataSize;
// to check if data size will exceed the block size then create a new file
createNewFileIfReachThreshold(blockletSize);
// write data to file
try {
if (currentOffsetInFile == 0) {
// write the header if file is empty
writeHeaderToFile();
}
writeBlockletToFile(dataChunkBytes);
if (listener != null && model.getDatabaseName().equalsIgnoreCase(listener.getTblIdentifier().getDatabaseName()) && model.getTableName().equalsIgnoreCase(listener.getTblIdentifier().getTableName())) {
listener.onBlockletEnd(blockletId++);
}
pageId = 0;
} catch (IOException e) {
LOGGER.error("Problem while writing file", e);
throw new CarbonDataWriterException("Problem while writing file", e);
} finally {
// clear the data holder
blockletDataHolder.clear();
}
}
use of org.apache.carbondata.core.datastore.blocklet.EncodedBlocklet in project carbondata by apache.
the class CarbonFactDataWriterImplV3 method writeBlockletToFile.
/**
* Write one blocklet data into file
* File format:
* <Column1 Data ChunkV3><Column1<Page1><Page2><Page3><Page4>>
* <Column2 Data ChunkV3><Column2<Page1><Page2><Page3><Page4>>
* <Column3 Data ChunkV3><Column3<Page1><Page2><Page3><Page4>>
* <Column4 Data ChunkV3><Column4<Page1><Page2><Page3><Page4>>
*/
private void writeBlockletToFile(byte[][] dataChunkBytes) throws IOException {
long offset = currentOffsetInFile;
// to maintain the offset of each data chunk in blocklet
List<Long> currentDataChunksOffset = new ArrayList<>();
// to maintain the length of each data chunk in blocklet
List<Integer> currentDataChunksLength = new ArrayList<>();
EncodedBlocklet encodedBlocklet = blockletDataHolder.getEncodedBlocklet();
int numberOfDimension = encodedBlocklet.getNumberOfDimension();
int numberOfMeasures = encodedBlocklet.getNumberOfMeasure();
ByteBuffer buffer = null;
long dimensionOffset = 0;
long measureOffset = 0;
for (int i = 0; i < numberOfDimension; i++) {
currentDataChunksOffset.add(offset);
currentDataChunksLength.add(dataChunkBytes[i].length);
buffer = ByteBuffer.wrap(dataChunkBytes[i]);
currentOffsetInFile += fileChannel.write(buffer);
offset += dataChunkBytes[i].length;
BlockletEncodedColumnPage blockletEncodedColumnPage = encodedBlocklet.getEncodedDimensionColumnPages().get(i);
for (EncodedColumnPage dimensionPage : blockletEncodedColumnPage.getEncodedColumnPageList()) {
buffer = dimensionPage.getEncodedData();
int bufferSize = buffer.limit();
currentOffsetInFile += fileChannel.write(buffer);
offset += bufferSize;
}
}
dimensionOffset = offset;
int dataChunkStartIndex = encodedBlocklet.getNumberOfDimension();
for (int i = 0; i < numberOfMeasures; i++) {
currentDataChunksOffset.add(offset);
currentDataChunksLength.add(dataChunkBytes[dataChunkStartIndex].length);
buffer = ByteBuffer.wrap(dataChunkBytes[dataChunkStartIndex]);
currentOffsetInFile += fileChannel.write(buffer);
offset += dataChunkBytes[dataChunkStartIndex].length;
dataChunkStartIndex++;
BlockletEncodedColumnPage blockletEncodedColumnPage = encodedBlocklet.getEncodedMeasureColumnPages().get(i);
for (EncodedColumnPage measurePage : blockletEncodedColumnPage.getEncodedColumnPageList()) {
buffer = measurePage.getEncodedData();
int bufferSize = buffer.limit();
currentOffsetInFile += fileChannel.write(buffer);
offset += bufferSize;
}
}
measureOffset = offset;
blockletIndex.add(CarbonMetadataUtil.getBlockletIndex(encodedBlocklet, model.getSegmentProperties().getMeasures()));
BlockletInfo3 blockletInfo3 = new BlockletInfo3(encodedBlocklet.getBlockletSize(), currentDataChunksOffset, currentDataChunksLength, dimensionOffset, measureOffset, encodedBlocklet.getNumberOfPages());
// Avoid storing as integer in encodedBocklet,
// but in thrift store as int for large number of rows future support
List<Integer> rowList = new ArrayList<>(encodedBlocklet.getRowCountInPage().size());
for (int rows : encodedBlocklet.getRowCountInPage()) {
rowList.add(rows);
}
blockletInfo3.setRow_count_in_page(rowList);
blockletMetadata.add(blockletInfo3);
}
Aggregations