Search in sources :

Example 11 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class UnsafeSingleThreadFinalSortFilesMerger method getSortedRecordFromFile.

/**
   * This method will be used to get the sorted record from file
   *
   * @return sorted record sorted record
   */
private Object[] getSortedRecordFromFile() throws CarbonDataWriterException {
    Object[] row = null;
    // poll the top object from heap
    // heap maintains binary tree which is based on heap condition that will
    // be based on comparator we are passing the heap
    // when will call poll it will always delete root of the tree and then
    // it does trickel down operation complexity is log(n)
    SortTempChunkHolder poll = this.recordHolderHeapLocal.poll();
    // get the row from chunk
    row = poll.getRow();
    // check if there no entry present
    if (!poll.hasNext()) {
        // if chunk is empty then close the stream
        poll.close();
        // change the file counter
        --this.fileCounter;
        // reaturn row
        return row;
    }
    // read new row
    try {
        poll.readRow();
    } catch (Exception e) {
        throw new CarbonDataWriterException(e.getMessage(), e);
    }
    // add to heap
    this.recordHolderHeapLocal.add(poll);
    // return row
    return row;
}
Also used : SortTempChunkHolder(org.apache.carbondata.processing.newflow.sort.unsafe.holder.SortTempChunkHolder) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)

Example 12 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class UnsafeSingleThreadFinalSortFilesMerger method startSorting.

/**
   * Below method will be used to start storing process This method will get
   * all the temp files present in sort temp folder then it will create the
   * record holder heap and then it will read first record from each file and
   * initialize the heap
   *
   */
private void startSorting(UnsafeCarbonRowPage[] rowPages, List<UnsafeInMemoryIntermediateDataMerger> merges) throws CarbonDataWriterException {
    try {
        File[] filesToMergeSort = getFilesToMergeSort();
        this.fileCounter = rowPages.length + filesToMergeSort.length + merges.size();
        if (fileCounter == 0) {
            LOGGER.info("No files to merge sort");
            return;
        }
        LOGGER.info("Number of row pages: " + this.fileCounter);
        // create record holder heap
        createRecordHolderQueue();
        // iterate over file list and create chunk holder and add to heap
        LOGGER.info("Started adding first record from each page");
        for (final UnsafeCarbonRowPage rowPage : rowPages) {
            SortTempChunkHolder sortTempFileChunkHolder = new UnsafeInmemoryHolder(rowPage, parameters.getDimColCount() + parameters.getComplexDimColCount() + parameters.getMeasureColCount(), parameters.getNumberOfSortColumns());
            // initialize
            sortTempFileChunkHolder.readRow();
            recordHolderHeapLocal.add(sortTempFileChunkHolder);
        }
        for (final UnsafeInMemoryIntermediateDataMerger merger : merges) {
            SortTempChunkHolder sortTempFileChunkHolder = new UnsafeFinalMergePageHolder(merger, parameters.getNoDictionarySortColumn(), parameters.getDimColCount() + parameters.getComplexDimColCount() + parameters.getMeasureColCount());
            // initialize
            sortTempFileChunkHolder.readRow();
            recordHolderHeapLocal.add(sortTempFileChunkHolder);
        }
        for (final File file : filesToMergeSort) {
            SortTempChunkHolder sortTempFileChunkHolder = new UnsafeSortTempFileChunkHolder(file, parameters);
            // initialize
            sortTempFileChunkHolder.readRow();
            recordHolderHeapLocal.add(sortTempFileChunkHolder);
        }
        LOGGER.info("Heap Size" + this.recordHolderHeapLocal.size());
    } catch (Exception e) {
        LOGGER.error(e);
        throw new CarbonDataWriterException(e.getMessage());
    }
}
Also used : UnsafeFinalMergePageHolder(org.apache.carbondata.processing.newflow.sort.unsafe.holder.UnsafeFinalMergePageHolder) SortTempChunkHolder(org.apache.carbondata.processing.newflow.sort.unsafe.holder.SortTempChunkHolder) UnsafeCarbonRowPage(org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage) File(java.io.File) UnsafeInmemoryHolder(org.apache.carbondata.processing.newflow.sort.unsafe.holder.UnsafeInmemoryHolder) UnsafeSortTempFileChunkHolder(org.apache.carbondata.processing.newflow.sort.unsafe.holder.UnsafeSortTempFileChunkHolder) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)

Example 13 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class CarbonFactDataWriterImplV1 method writeBlockletInfoToFile.

/**
   * This method will write metadata at the end of file file format in thrift format
   */
protected void writeBlockletInfoToFile(FileChannel channel, String filePath) throws CarbonDataWriterException {
    try {
        long currentPosition = channel.size();
        CarbonFooterWriter writer = new CarbonFooterWriter(filePath);
        FileFooter convertFileMeta = CarbonMetadataUtil.convertFileFooter(blockletInfoList, localCardinality.length, localCardinality, thriftColumnSchemaList, dataWriterVo.getSegmentProperties());
        fillBlockIndexInfoDetails(convertFileMeta.getNum_rows(), carbonDataFileName, currentPosition);
        writer.writeFooter(convertFileMeta, currentPosition);
    } catch (IOException e) {
        throw new CarbonDataWriterException("Problem while writing the carbon file: ", e);
    }
}
Also used : FileFooter(org.apache.carbondata.format.FileFooter) IOException(java.io.IOException) CarbonFooterWriter(org.apache.carbondata.core.writer.CarbonFooterWriter) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)

Example 14 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class CarbonFactDataWriterImplV1 method writeDataToFile.

/**
   * This method is responsible for writing blocklet to the data file
   *
   * @return file offset offset is the current position of the file
   * @throws CarbonDataWriterException if will throw CarbonDataWriterException when any thing
   *                                   goes wrong while while writing the leaf file
   */
private long writeDataToFile(NodeHolder nodeHolder, FileChannel channel) throws CarbonDataWriterException {
    // create byte buffer
    byte[][] compressedIndex = nodeHolder.getCompressedIndex();
    byte[][] compressedIndexMap = nodeHolder.getCompressedIndexMap();
    byte[][] compressedDataIndex = nodeHolder.getCompressedDataIndex();
    int indexBlockSize = 0;
    int index = 0;
    for (int i = 0; i < nodeHolder.getKeyBlockIndexLength().length; i++) {
        indexBlockSize += nodeHolder.getKeyBlockIndexLength()[index++] + CarbonCommonConstants.INT_SIZE_IN_BYTE;
    }
    for (int i = 0; i < nodeHolder.getDataIndexMapLength().length; i++) {
        indexBlockSize += nodeHolder.getDataIndexMapLength()[i];
    }
    ByteBuffer byteBuffer = ByteBuffer.allocate(nodeHolder.getTotalDimensionArrayLength() + nodeHolder.getTotalMeasureArrayLength() + indexBlockSize);
    long offset = 0;
    try {
        // get the current offset
        offset = channel.size();
        // add key array to byte buffer
        for (int i = 0; i < nodeHolder.getKeyArray().length; i++) {
            byteBuffer.put(nodeHolder.getKeyArray()[i]);
        }
        for (int i = 0; i < nodeHolder.getDataArray().length; i++) {
            byteBuffer.put(nodeHolder.getDataArray()[i]);
        }
        // add measure data array to byte buffer
        ByteBuffer buffer1 = null;
        for (int i = 0; i < compressedIndex.length; i++) {
            buffer1 = ByteBuffer.allocate(nodeHolder.getKeyBlockIndexLength()[i]);
            buffer1.putInt(compressedIndex[i].length);
            buffer1.put(compressedIndex[i]);
            if (compressedIndexMap[i].length > 0) {
                buffer1.put(compressedIndexMap[i]);
            }
            buffer1.rewind();
            byteBuffer.put(buffer1.array());
        }
        for (int i = 0; i < compressedDataIndex.length; i++) {
            byteBuffer.put(compressedDataIndex[i]);
        }
        byteBuffer.flip();
        // write data to file
        channel.write(byteBuffer);
    } catch (IOException exception) {
        throw new CarbonDataWriterException("Problem in writing carbon file: ", exception);
    }
    // engine side to get from which position to start reading the file
    return offset;
}
Also used : IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)

Example 15 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class CarbonFactDataWriterImplV2 method writeBlockletData.

/**
   * Below method will be used to write the data to carbon data file
   *
   * @param holder
   * @throws CarbonDataWriterException any problem in writing operation
   */
@Override
public void writeBlockletData(NodeHolder holder) throws CarbonDataWriterException {
    if (holder.getEntryCount() == 0) {
        return;
    }
    // size to calculate the size of the blocklet
    int size = 0;
    // get the blocklet info object
    BlockletInfoColumnar blockletInfo = getBlockletInfo(holder, 0);
    List<DataChunk2> datachunks = null;
    try {
        // get all the data chunks
        datachunks = CarbonMetadataUtil.getDatachunk2(blockletInfo, thriftColumnSchemaList, dataWriterVo.getSegmentProperties());
    } catch (IOException e) {
        throw new CarbonDataWriterException("Problem while getting the data chunks", e);
    }
    // data chunk byte array
    byte[][] dataChunkByteArray = new byte[datachunks.size()][];
    for (int i = 0; i < dataChunkByteArray.length; i++) {
        dataChunkByteArray[i] = CarbonUtil.getByteArray(datachunks.get(i));
        // add the data chunk size
        size += dataChunkByteArray[i].length;
    }
    // add row id index length
    for (int i = 0; i < holder.getKeyBlockIndexLength().length; i++) {
        size += holder.getKeyBlockIndexLength()[i];
    }
    // add rle index length
    for (int i = 0; i < holder.getDataIndexMapLength().length; i++) {
        size += holder.getDataIndexMapLength()[i];
    }
    // add dimension column data page and measure column data page size
    long blockletDataSize = holder.getTotalDimensionArrayLength() + holder.getTotalMeasureArrayLength() + size;
    // if size of the file already reached threshold size then create a new file and get the file
    // channel object
    updateBlockletFileChannel(blockletDataSize);
    // this is done so carbondata file can be read separately
    try {
        if (fileChannel.size() == 0) {
            ColumnarFormatVersion version = CarbonProperties.getInstance().getFormatVersion();
            byte[] header = (CarbonCommonConstants.CARBON_DATA_VERSION_HEADER + version).getBytes();
            ByteBuffer buffer = ByteBuffer.allocate(header.length);
            buffer.put(header);
            buffer.rewind();
            fileChannel.write(buffer);
        }
    } catch (IOException e) {
        throw new CarbonDataWriterException("Problem while getting the file channel size", e);
    }
    // write data to file and get its offset
    writeDataToFile(holder, dataChunkByteArray, fileChannel);
    // add blocklet info to list
    blockletInfoList.add(blockletInfo);
    LOGGER.info("A new blocklet is added, its data size is: " + blockletDataSize + " Byte");
}
Also used : BlockletInfoColumnar(org.apache.carbondata.core.metadata.BlockletInfoColumnar) DataChunk2(org.apache.carbondata.format.DataChunk2) IOException(java.io.IOException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) ByteBuffer(java.nio.ByteBuffer) ColumnarFormatVersion(org.apache.carbondata.core.metadata.ColumnarFormatVersion)

Aggregations

CarbonDataWriterException (org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)22 IOException (java.io.IOException)14 ByteBuffer (java.nio.ByteBuffer)7 Iterator (java.util.Iterator)5 CarbonDataLoadingException (org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException)5 CarbonSortKeyAndGroupByException (org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException)5 NodeHolder (org.apache.carbondata.core.util.NodeHolder)4 File (java.io.File)3 CarbonIterator (org.apache.carbondata.common.CarbonIterator)3 CarbonRowBatch (org.apache.carbondata.processing.newflow.row.CarbonRowBatch)3 ArrayList (java.util.ArrayList)2 ExecutorService (java.util.concurrent.ExecutorService)2 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)2 CarbonFooterWriter (org.apache.carbondata.core.writer.CarbonFooterWriter)2 FileFooter (org.apache.carbondata.format.FileFooter)2 CarbonRow (org.apache.carbondata.processing.newflow.row.CarbonRow)2 UnsafeCarbonRowPage (org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage)2 SortTempChunkHolder (org.apache.carbondata.processing.newflow.sort.unsafe.holder.SortTempChunkHolder)2 SortDataRows (org.apache.carbondata.processing.sortandgroupby.sortdata.SortDataRows)2 SortTempFileChunkHolder (org.apache.carbondata.processing.sortandgroupby.sortdata.SortTempFileChunkHolder)2