Search in sources :

Example 1 with EncodedBlocklet

use of org.apache.carbondata.core.datastore.blocklet.EncodedBlocklet in project carbondata by apache.

the class CarbonFactDataWriterImplV3 method writeBlockletToFile.

/**
 * Write the collect blocklet data (blockletDataHolder) to file
 */
private void writeBlockletToFile() {
    // get the list of all encoded table page
    EncodedBlocklet encodedBlocklet = blockletDataHolder.getEncodedBlocklet();
    int numDimensions = encodedBlocklet.getNumberOfDimension();
    int numMeasures = encodedBlocklet.getNumberOfMeasure();
    // get data chunks for all the column
    byte[][] dataChunkBytes = new byte[numDimensions + numMeasures][];
    long metadataSize = fillDataChunk(encodedBlocklet, dataChunkBytes);
    // calculate the total size of data to be written
    long blockletSize = blockletDataHolder.getSize() + metadataSize;
    // to check if data size will exceed the block size then create a new file
    createNewFileIfReachThreshold(blockletSize);
    // write data to file
    try {
        if (currentOffsetInFile == 0) {
            // write the header if file is empty
            writeHeaderToFile();
        }
        writeBlockletToFile(dataChunkBytes);
        if (listener != null && model.getDatabaseName().equalsIgnoreCase(listener.getTblIdentifier().getDatabaseName()) && model.getTableName().equalsIgnoreCase(listener.getTblIdentifier().getTableName())) {
            listener.onBlockletEnd(blockletId++);
        }
        pageId = 0;
    } catch (IOException e) {
        LOGGER.error("Problem while writing file", e);
        throw new CarbonDataWriterException("Problem while writing file", e);
    } finally {
        // clear the data holder
        blockletDataHolder.clear();
    }
}
Also used : EncodedBlocklet(org.apache.carbondata.core.datastore.blocklet.EncodedBlocklet) IOException(java.io.IOException) CarbonDataWriterException(org.apache.carbondata.core.datastore.exception.CarbonDataWriterException)

Example 2 with EncodedBlocklet

use of org.apache.carbondata.core.datastore.blocklet.EncodedBlocklet in project carbondata by apache.

the class CarbonFactDataWriterImplV3 method writeBlockletToFile.

/**
 * Write one blocklet data into file
 * File format:
 * <Column1 Data ChunkV3><Column1<Page1><Page2><Page3><Page4>>
 * <Column2 Data ChunkV3><Column2<Page1><Page2><Page3><Page4>>
 * <Column3 Data ChunkV3><Column3<Page1><Page2><Page3><Page4>>
 * <Column4 Data ChunkV3><Column4<Page1><Page2><Page3><Page4>>
 */
private void writeBlockletToFile(byte[][] dataChunkBytes) throws IOException {
    long offset = currentOffsetInFile;
    // to maintain the offset of each data chunk in blocklet
    List<Long> currentDataChunksOffset = new ArrayList<>();
    // to maintain the length of each data chunk in blocklet
    List<Integer> currentDataChunksLength = new ArrayList<>();
    EncodedBlocklet encodedBlocklet = blockletDataHolder.getEncodedBlocklet();
    int numberOfDimension = encodedBlocklet.getNumberOfDimension();
    int numberOfMeasures = encodedBlocklet.getNumberOfMeasure();
    ByteBuffer buffer = null;
    long dimensionOffset = 0;
    long measureOffset = 0;
    for (int i = 0; i < numberOfDimension; i++) {
        currentDataChunksOffset.add(offset);
        currentDataChunksLength.add(dataChunkBytes[i].length);
        buffer = ByteBuffer.wrap(dataChunkBytes[i]);
        currentOffsetInFile += fileChannel.write(buffer);
        offset += dataChunkBytes[i].length;
        BlockletEncodedColumnPage blockletEncodedColumnPage = encodedBlocklet.getEncodedDimensionColumnPages().get(i);
        for (EncodedColumnPage dimensionPage : blockletEncodedColumnPage.getEncodedColumnPageList()) {
            buffer = dimensionPage.getEncodedData();
            int bufferSize = buffer.limit();
            currentOffsetInFile += fileChannel.write(buffer);
            offset += bufferSize;
        }
    }
    dimensionOffset = offset;
    int dataChunkStartIndex = encodedBlocklet.getNumberOfDimension();
    for (int i = 0; i < numberOfMeasures; i++) {
        currentDataChunksOffset.add(offset);
        currentDataChunksLength.add(dataChunkBytes[dataChunkStartIndex].length);
        buffer = ByteBuffer.wrap(dataChunkBytes[dataChunkStartIndex]);
        currentOffsetInFile += fileChannel.write(buffer);
        offset += dataChunkBytes[dataChunkStartIndex].length;
        dataChunkStartIndex++;
        BlockletEncodedColumnPage blockletEncodedColumnPage = encodedBlocklet.getEncodedMeasureColumnPages().get(i);
        for (EncodedColumnPage measurePage : blockletEncodedColumnPage.getEncodedColumnPageList()) {
            buffer = measurePage.getEncodedData();
            int bufferSize = buffer.limit();
            currentOffsetInFile += fileChannel.write(buffer);
            offset += bufferSize;
        }
    }
    measureOffset = offset;
    blockletIndex.add(CarbonMetadataUtil.getBlockletIndex(encodedBlocklet, model.getSegmentProperties().getMeasures()));
    BlockletInfo3 blockletInfo3 = new BlockletInfo3(encodedBlocklet.getBlockletSize(), currentDataChunksOffset, currentDataChunksLength, dimensionOffset, measureOffset, encodedBlocklet.getNumberOfPages());
    // Avoid storing as integer in encodedBocklet,
    // but in thrift store as int for large number of rows future support
    List<Integer> rowList = new ArrayList<>(encodedBlocklet.getRowCountInPage().size());
    for (int rows : encodedBlocklet.getRowCountInPage()) {
        rowList.add(rows);
    }
    blockletInfo3.setRow_count_in_page(rowList);
    blockletMetadata.add(blockletInfo3);
}
Also used : EncodedColumnPage(org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage) BlockletEncodedColumnPage(org.apache.carbondata.core.datastore.blocklet.BlockletEncodedColumnPage) BlockletInfo3(org.apache.carbondata.format.BlockletInfo3) BlockletEncodedColumnPage(org.apache.carbondata.core.datastore.blocklet.BlockletEncodedColumnPage) EncodedBlocklet(org.apache.carbondata.core.datastore.blocklet.EncodedBlocklet) ArrayList(java.util.ArrayList) ByteBuffer(java.nio.ByteBuffer)

Aggregations

EncodedBlocklet (org.apache.carbondata.core.datastore.blocklet.EncodedBlocklet)2 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 BlockletEncodedColumnPage (org.apache.carbondata.core.datastore.blocklet.BlockletEncodedColumnPage)1 CarbonDataWriterException (org.apache.carbondata.core.datastore.exception.CarbonDataWriterException)1 EncodedColumnPage (org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage)1 BlockletInfo3 (org.apache.carbondata.format.BlockletInfo3)1