Search in sources :

Example 1 with BlockletInfo3

use of org.apache.carbondata.format.BlockletInfo3 in project carbondata by apache.

the class CarbonFactDataWriterImplV3 method writeDataToFile.

/**
   * Below method will be used to write data in carbon data file
   * Data Format
   * <Column1 Data ChunkV3><Column1<Page1><Page2><Page3><Page4>>
   * <Column2 Data ChunkV3><Column2<Page1><Page2><Page3><Page4>>
   * <Column3 Data ChunkV3><Column3<Page1><Page2><Page3><Page4>>
   * <Column4 Data ChunkV3><Column4<Page1><Page2><Page3><Page4>>
   * Each page will contain column data, Inverted index and rle index
   *
   * @param channel
   * @param dataChunkBytes
   */
private void writeDataToFile(FileChannel channel, byte[][] dataChunkBytes) {
    long offset = 0;
    // write the header
    try {
        if (fileChannel.size() == 0) {
            // below code is to write the file header
            byte[] fileHeader = CarbonUtil.getByteArray(CarbonMetadataUtil.getFileHeader(true, thriftColumnSchemaList, dataWriterVo.getSchemaUpdatedTimeStamp()));
            ByteBuffer buffer = ByteBuffer.allocate(fileHeader.length);
            buffer.put(fileHeader);
            buffer.flip();
            fileChannel.write(buffer);
        }
        offset = channel.size();
    } catch (IOException e) {
        throw new CarbonDataWriterException("Problem while getting the file channel size");
    }
    // to maintain the offset of each data chunk in blocklet
    List<Long> currentDataChunksOffset = new ArrayList<>();
    // to maintain the length of each data chunk in blocklet
    List<Integer> currentDataChunksLength = new ArrayList<>();
    // get the node holder list
    List<NodeHolder> nodeHolderList = dataWriterHolder.getNodeHolder();
    int numberOfDimension = nodeHolderList.get(0).getKeyArray().length;
    int numberOfMeasures = nodeHolderList.get(0).getDataArray().length;
    NodeHolder nodeHolder = null;
    ByteBuffer buffer = null;
    int bufferSize = 0;
    long dimensionOffset = 0;
    long measureOffset = 0;
    int numberOfRows = 0;
    // calculate the number of rows in each blocklet
    for (int j = 0; j < nodeHolderList.size(); j++) {
        numberOfRows += nodeHolderList.get(j).getEntryCount();
    }
    try {
        for (int i = 0; i < numberOfDimension; i++) {
            currentDataChunksOffset.add(offset);
            currentDataChunksLength.add(dataChunkBytes[i].length);
            buffer = ByteBuffer.allocate(dataChunkBytes[i].length);
            buffer.put(dataChunkBytes[i]);
            buffer.flip();
            fileChannel.write(buffer);
            offset += dataChunkBytes[i].length;
            for (int j = 0; j < nodeHolderList.size(); j++) {
                nodeHolder = nodeHolderList.get(j);
                bufferSize = nodeHolder.getKeyLengths()[i] + (!nodeHolder.getIsSortedKeyBlock()[i] ? nodeHolder.getKeyBlockIndexLength()[i] : 0) + (dataWriterVo.getAggBlocks()[i] ? nodeHolder.getCompressedDataIndex()[i].length : 0);
                buffer = ByteBuffer.allocate(bufferSize);
                buffer.put(nodeHolder.getKeyArray()[i]);
                if (!nodeHolder.getIsSortedKeyBlock()[i]) {
                    buffer.putInt(nodeHolder.getCompressedIndex()[i].length);
                    buffer.put(nodeHolder.getCompressedIndex()[i]);
                    if (nodeHolder.getCompressedIndexMap()[i].length > 0) {
                        buffer.put(nodeHolder.getCompressedIndexMap()[i]);
                    }
                }
                if (nodeHolder.getAggBlocks()[i]) {
                    buffer.put(nodeHolder.getCompressedDataIndex()[i]);
                }
                buffer.flip();
                fileChannel.write(buffer);
                offset += bufferSize;
            }
        }
        dimensionOffset = offset;
        int dataChunkStartIndex = nodeHolderList.get(0).getKeyArray().length;
        for (int i = 0; i < numberOfMeasures; i++) {
            nodeHolderList = dataWriterHolder.getNodeHolder();
            currentDataChunksOffset.add(offset);
            currentDataChunksLength.add(dataChunkBytes[dataChunkStartIndex].length);
            buffer = ByteBuffer.allocate(dataChunkBytes[dataChunkStartIndex].length);
            buffer.put(dataChunkBytes[dataChunkStartIndex]);
            buffer.flip();
            fileChannel.write(buffer);
            offset += dataChunkBytes[dataChunkStartIndex].length;
            dataChunkStartIndex++;
            for (int j = 0; j < nodeHolderList.size(); j++) {
                nodeHolder = nodeHolderList.get(j);
                bufferSize = nodeHolder.getDataArray()[i].length;
                buffer = ByteBuffer.allocate(bufferSize);
                buffer.put(nodeHolder.getDataArray()[i]);
                buffer.flip();
                fileChannel.write(buffer);
                offset += bufferSize;
            }
        }
        measureOffset = offset;
    } catch (IOException e) {
        throw new CarbonDataWriterException("Problem while writing the data", e);
    }
    blockletIndex.add(CarbonMetadataUtil.getBlockletIndex(nodeHolderList, dataWriterVo.getSegmentProperties().getMeasures()));
    BlockletInfo3 blockletInfo3 = new BlockletInfo3(numberOfRows, currentDataChunksOffset, currentDataChunksLength, dimensionOffset, measureOffset, dataWriterHolder.getNodeHolder().size());
    blockletMetadata.add(blockletInfo3);
}
Also used : ArrayList(java.util.ArrayList) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) NodeHolder(org.apache.carbondata.core.util.NodeHolder) BlockletInfo3(org.apache.carbondata.format.BlockletInfo3)

Aggregations

IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 NodeHolder (org.apache.carbondata.core.util.NodeHolder)1 BlockletInfo3 (org.apache.carbondata.format.BlockletInfo3)1 CarbonDataWriterException (org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)1