Search in sources :

Example 1 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class AbstractFactDataWriter method closeWriter.

/**
   * Method will be used to close the open file channel
   *
   * @throws CarbonDataWriterException
   */
public void closeWriter() throws CarbonDataWriterException {
    CarbonUtil.closeStreams(this.fileOutputStream, this.fileChannel);
    if (this.blockletInfoList.size() > 0) {
        renameCarbonDataFile();
        copyCarbonDataFileToCarbonStorePath(this.carbonDataFileTempPath.substring(0, this.carbonDataFileTempPath.lastIndexOf('.')));
        try {
            writeIndexFile();
        } catch (IOException e) {
            throw new CarbonDataWriterException("Problem while writing the index file", e);
        }
    }
    closeExecutorService();
}
Also used : IOException(java.io.IOException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)

Example 2 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class SingleThreadFinalSortFilesMerger method getSortedRecordFromFile.

/**
   * This method will be used to get the sorted record from file
   *
   * @return sorted record sorted record
   * @throws CarbonSortKeyAndGroupByException
   */
private Object[] getSortedRecordFromFile() throws CarbonDataWriterException {
    Object[] row = null;
    // poll the top object from heap
    // heap maintains binary tree which is based on heap condition that will
    // be based on comparator we are passing the heap
    // when will call poll it will always delete root of the tree and then
    // it does trickel down operation complexity is log(n)
    SortTempFileChunkHolder poll = this.recordHolderHeapLocal.poll();
    // get the row from chunk
    row = poll.getRow();
    // check if there no entry present
    if (!poll.hasNext()) {
        // if chunk is empty then close the stream
        poll.closeStream();
        // change the file counter
        --this.fileCounter;
        // reaturn row
        return row;
    }
    // read new row
    try {
        poll.readRow();
    } catch (CarbonSortKeyAndGroupByException e) {
        throw new CarbonDataWriterException(e.getMessage(), e);
    }
    // add to heap
    this.recordHolderHeapLocal.add(poll);
    // return row
    return row;
}
Also used : CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) SortTempFileChunkHolder(org.apache.carbondata.processing.sortandgroupby.sortdata.SortTempFileChunkHolder) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)

Example 3 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class CarbonFactDataWriterImplV3 method writeBlockletInfoToFile.

@Override
protected void writeBlockletInfoToFile(FileChannel channel, String filePath) throws CarbonDataWriterException {
    try {
        // get the current file position
        long currentPosition = channel.size();
        // get thrift file footer instance
        FileFooter3 convertFileMeta = CarbonMetadataUtil.convertFileFooterVersion3(blockletMetadata, blockletIndex, localCardinality, thriftColumnSchemaList.size(), dataWriterVo.getSegmentProperties());
        // fill the carbon index details
        fillBlockIndexInfoDetails(convertFileMeta.getNum_rows(), carbonDataFileName, currentPosition);
        // write the footer
        byte[] byteArray = CarbonUtil.getByteArray(convertFileMeta);
        ByteBuffer buffer = ByteBuffer.allocate(byteArray.length + CarbonCommonConstants.LONG_SIZE_IN_BYTE);
        buffer.put(byteArray);
        buffer.putLong(currentPosition);
        buffer.flip();
        channel.write(buffer);
    } catch (IOException e) {
        throw new CarbonDataWriterException("Problem while writing the carbon file: ", e);
    }
}
Also used : FileFooter3(org.apache.carbondata.format.FileFooter3) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)

Example 4 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class CarbonFactDataWriterImplV3 method writeDataToFile.

/**
   * Below method will be used to write data in carbon data file
   * Data Format
   * <Column1 Data ChunkV3><Column1<Page1><Page2><Page3><Page4>>
   * <Column2 Data ChunkV3><Column2<Page1><Page2><Page3><Page4>>
   * <Column3 Data ChunkV3><Column3<Page1><Page2><Page3><Page4>>
   * <Column4 Data ChunkV3><Column4<Page1><Page2><Page3><Page4>>
   * Each page will contain column data, Inverted index and rle index
   *
   * @param channel
   * @param dataChunkBytes
   */
private void writeDataToFile(FileChannel channel, byte[][] dataChunkBytes) {
    long offset = 0;
    // write the header
    try {
        if (fileChannel.size() == 0) {
            // below code is to write the file header
            byte[] fileHeader = CarbonUtil.getByteArray(CarbonMetadataUtil.getFileHeader(true, thriftColumnSchemaList, dataWriterVo.getSchemaUpdatedTimeStamp()));
            ByteBuffer buffer = ByteBuffer.allocate(fileHeader.length);
            buffer.put(fileHeader);
            buffer.flip();
            fileChannel.write(buffer);
        }
        offset = channel.size();
    } catch (IOException e) {
        throw new CarbonDataWriterException("Problem while getting the file channel size");
    }
    // to maintain the offset of each data chunk in blocklet
    List<Long> currentDataChunksOffset = new ArrayList<>();
    // to maintain the length of each data chunk in blocklet
    List<Integer> currentDataChunksLength = new ArrayList<>();
    // get the node holder list
    List<NodeHolder> nodeHolderList = dataWriterHolder.getNodeHolder();
    int numberOfDimension = nodeHolderList.get(0).getKeyArray().length;
    int numberOfMeasures = nodeHolderList.get(0).getDataArray().length;
    NodeHolder nodeHolder = null;
    ByteBuffer buffer = null;
    int bufferSize = 0;
    long dimensionOffset = 0;
    long measureOffset = 0;
    int numberOfRows = 0;
    // calculate the number of rows in each blocklet
    for (int j = 0; j < nodeHolderList.size(); j++) {
        numberOfRows += nodeHolderList.get(j).getEntryCount();
    }
    try {
        for (int i = 0; i < numberOfDimension; i++) {
            currentDataChunksOffset.add(offset);
            currentDataChunksLength.add(dataChunkBytes[i].length);
            buffer = ByteBuffer.allocate(dataChunkBytes[i].length);
            buffer.put(dataChunkBytes[i]);
            buffer.flip();
            fileChannel.write(buffer);
            offset += dataChunkBytes[i].length;
            for (int j = 0; j < nodeHolderList.size(); j++) {
                nodeHolder = nodeHolderList.get(j);
                bufferSize = nodeHolder.getKeyLengths()[i] + (!nodeHolder.getIsSortedKeyBlock()[i] ? nodeHolder.getKeyBlockIndexLength()[i] : 0) + (dataWriterVo.getAggBlocks()[i] ? nodeHolder.getCompressedDataIndex()[i].length : 0);
                buffer = ByteBuffer.allocate(bufferSize);
                buffer.put(nodeHolder.getKeyArray()[i]);
                if (!nodeHolder.getIsSortedKeyBlock()[i]) {
                    buffer.putInt(nodeHolder.getCompressedIndex()[i].length);
                    buffer.put(nodeHolder.getCompressedIndex()[i]);
                    if (nodeHolder.getCompressedIndexMap()[i].length > 0) {
                        buffer.put(nodeHolder.getCompressedIndexMap()[i]);
                    }
                }
                if (nodeHolder.getAggBlocks()[i]) {
                    buffer.put(nodeHolder.getCompressedDataIndex()[i]);
                }
                buffer.flip();
                fileChannel.write(buffer);
                offset += bufferSize;
            }
        }
        dimensionOffset = offset;
        int dataChunkStartIndex = nodeHolderList.get(0).getKeyArray().length;
        for (int i = 0; i < numberOfMeasures; i++) {
            nodeHolderList = dataWriterHolder.getNodeHolder();
            currentDataChunksOffset.add(offset);
            currentDataChunksLength.add(dataChunkBytes[dataChunkStartIndex].length);
            buffer = ByteBuffer.allocate(dataChunkBytes[dataChunkStartIndex].length);
            buffer.put(dataChunkBytes[dataChunkStartIndex]);
            buffer.flip();
            fileChannel.write(buffer);
            offset += dataChunkBytes[dataChunkStartIndex].length;
            dataChunkStartIndex++;
            for (int j = 0; j < nodeHolderList.size(); j++) {
                nodeHolder = nodeHolderList.get(j);
                bufferSize = nodeHolder.getDataArray()[i].length;
                buffer = ByteBuffer.allocate(bufferSize);
                buffer.put(nodeHolder.getDataArray()[i]);
                buffer.flip();
                fileChannel.write(buffer);
                offset += bufferSize;
            }
        }
        measureOffset = offset;
    } catch (IOException e) {
        throw new CarbonDataWriterException("Problem while writing the data", e);
    }
    blockletIndex.add(CarbonMetadataUtil.getBlockletIndex(nodeHolderList, dataWriterVo.getSegmentProperties().getMeasures()));
    BlockletInfo3 blockletInfo3 = new BlockletInfo3(numberOfRows, currentDataChunksOffset, currentDataChunksLength, dimensionOffset, measureOffset, dataWriterHolder.getNodeHolder().size());
    blockletMetadata.add(blockletInfo3);
}
Also used : ArrayList(java.util.ArrayList) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) NodeHolder(org.apache.carbondata.core.util.NodeHolder) BlockletInfo3(org.apache.carbondata.format.BlockletInfo3)

Example 5 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class CarbonFactDataWriterImplV1 method buildDataNodeHolder.

@Override
public NodeHolder buildDataNodeHolder(IndexStorage<int[]>[] keyStorageArray, byte[][] measureArray, int entryCount, byte[] startKey, byte[] endKey, WriterCompressModel compressionModel, byte[] noDictionaryStartKey, byte[] noDictionaryEndKey, BitSet[] nullValueIndexBitSet) throws CarbonDataWriterException {
    // set the empty byte array
    if (null == noDictionaryEndKey) {
        noDictionaryEndKey = new byte[0];
    }
    if (null == noDictionaryStartKey) {
        noDictionaryStartKey = new byte[0];
    }
    // total measure length;
    int totalMsrArrySize = 0;
    // current measure length;
    int currentMsrLenght = 0;
    int totalKeySize = 0;
    int keyBlockSize = 0;
    boolean[] isSortedData = new boolean[keyStorageArray.length];
    int[] keyLengths = new int[keyStorageArray.length];
    //below will calculate min and max value for each column
    //for below 2d array, first index will be for column and second will be min max
    // value for same column
    //    byte[][] columnMinMaxData = new byte[keyStorageArray.length][];
    byte[][] allMinValue = new byte[keyStorageArray.length][];
    byte[][] allMaxValue = new byte[keyStorageArray.length][];
    byte[][] keyBlockData = fillAndCompressedKeyBlockData(keyStorageArray, entryCount);
    boolean[] colGrpBlock = new boolean[keyStorageArray.length];
    for (int i = 0; i < keyLengths.length; i++) {
        keyLengths[i] = keyBlockData[i].length;
        isSortedData[i] = keyStorageArray[i].isAlreadySorted();
        if (!isSortedData[i]) {
            keyBlockSize++;
        }
        totalKeySize += keyLengths[i];
        if (dataWriterVo.getIsComplexType()[i] || dataWriterVo.getIsDictionaryColumn()[i]) {
            allMinValue[i] = keyStorageArray[i].getMin();
            allMaxValue[i] = keyStorageArray[i].getMax();
        } else {
            allMinValue[i] = updateMinMaxForNoDictionary(keyStorageArray[i].getMin());
            allMaxValue[i] = updateMinMaxForNoDictionary(keyStorageArray[i].getMax());
        }
        //if keyStorageArray is instance of ColGroupBlockStorage than it's colGroup chunk
        if (keyStorageArray[i] instanceof ColGroupBlockStorage) {
            colGrpBlock[i] = true;
        }
    }
    int[] keyBlockIdxLengths = new int[keyBlockSize];
    byte[][] dataAfterCompression = new byte[keyBlockSize][];
    byte[][] indexMap = new byte[keyBlockSize][];
    int idx = 0;
    for (int i = 0; i < isSortedData.length; i++) {
        if (!isSortedData[i]) {
            dataAfterCompression[idx] = numberCompressor.compress(keyStorageArray[i].getDataAfterComp());
            if (null != keyStorageArray[i].getIndexMap() && keyStorageArray[i].getIndexMap().length > 0) {
                indexMap[idx] = numberCompressor.compress(keyStorageArray[i].getIndexMap());
            } else {
                indexMap[idx] = new byte[0];
            }
            keyBlockIdxLengths[idx] = (dataAfterCompression[idx].length + indexMap[idx].length) + CarbonCommonConstants.INT_SIZE_IN_BYTE;
            idx++;
        }
    }
    int compressDataBlockSize = 0;
    for (int i = 0; i < dataWriterVo.getAggBlocks().length; i++) {
        if (dataWriterVo.getAggBlocks()[i]) {
            compressDataBlockSize++;
        }
    }
    byte[][] compressedDataIndex = new byte[compressDataBlockSize][];
    int[] dataIndexMapLength = new int[compressDataBlockSize];
    idx = 0;
    for (int i = 0; i < dataWriterVo.getAggBlocks().length; i++) {
        if (dataWriterVo.getAggBlocks()[i]) {
            try {
                compressedDataIndex[idx] = numberCompressor.compress(keyStorageArray[i].getDataIndexMap());
                dataIndexMapLength[idx] = compressedDataIndex[idx].length;
                idx++;
            } catch (Exception e) {
                throw new CarbonDataWriterException(e.getMessage());
            }
        }
    }
    int[] msrLength = new int[dataWriterVo.getMeasureCount()];
    // each measure size
    for (int i = 0; i < measureArray.length; i++) {
        currentMsrLenght = measureArray[i].length;
        totalMsrArrySize += currentMsrLenght;
        msrLength[i] = currentMsrLenght;
    }
    NodeHolder holder = new NodeHolder();
    holder.setDataArray(measureArray);
    holder.setKeyArray(keyBlockData);
    holder.setMeasureNullValueIndex(nullValueIndexBitSet);
    // end key format will be <length of dictionary key><length of no
    // dictionary key><DictionaryKey><No Dictionary key>
    byte[] updatedNoDictionaryEndKey = updateNoDictionaryStartAndEndKey(noDictionaryEndKey);
    ByteBuffer buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + CarbonCommonConstants.INT_SIZE_IN_BYTE + endKey.length + updatedNoDictionaryEndKey.length);
    buffer.putInt(endKey.length);
    buffer.putInt(updatedNoDictionaryEndKey.length);
    buffer.put(endKey);
    buffer.put(updatedNoDictionaryEndKey);
    buffer.rewind();
    holder.setEndKey(buffer.array());
    holder.setMeasureLenght(msrLength);
    byte[] updatedNoDictionaryStartKey = updateNoDictionaryStartAndEndKey(noDictionaryStartKey);
    // start key format will be <length of dictionary key><length of no
    // dictionary key><DictionaryKey><No Dictionary key>
    buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + CarbonCommonConstants.INT_SIZE_IN_BYTE + startKey.length + updatedNoDictionaryStartKey.length);
    buffer.putInt(startKey.length);
    buffer.putInt(updatedNoDictionaryStartKey.length);
    buffer.put(startKey);
    buffer.put(updatedNoDictionaryStartKey);
    buffer.rewind();
    holder.setStartKey(buffer.array());
    holder.setEntryCount(entryCount);
    holder.setKeyLengths(keyLengths);
    holder.setKeyBlockIndexLength(keyBlockIdxLengths);
    holder.setIsSortedKeyBlock(isSortedData);
    holder.setCompressedIndex(dataAfterCompression);
    holder.setCompressedIndexMap(indexMap);
    holder.setDataIndexMapLength(dataIndexMapLength);
    holder.setCompressedDataIndex(compressedDataIndex);
    holder.setCompressionModel(compressionModel);
    holder.setTotalDimensionArrayLength(totalKeySize);
    holder.setTotalMeasureArrayLength(totalMsrArrySize);
    //setting column min max value
    holder.setColumnMaxData(allMaxValue);
    holder.setColumnMinData(allMinValue);
    holder.setAggBlocks(dataWriterVo.getAggBlocks());
    holder.setColGrpBlocks(colGrpBlock);
    return holder;
}
Also used : ColGroupBlockStorage(org.apache.carbondata.processing.store.colgroup.ColGroupBlockStorage) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) NodeHolder(org.apache.carbondata.core.util.NodeHolder) ByteBuffer(java.nio.ByteBuffer) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) IOException(java.io.IOException)

Aggregations

CarbonDataWriterException (org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)22 IOException (java.io.IOException)14 ByteBuffer (java.nio.ByteBuffer)7 Iterator (java.util.Iterator)5 CarbonDataLoadingException (org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException)5 CarbonSortKeyAndGroupByException (org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException)5 NodeHolder (org.apache.carbondata.core.util.NodeHolder)4 File (java.io.File)3 CarbonIterator (org.apache.carbondata.common.CarbonIterator)3 CarbonRowBatch (org.apache.carbondata.processing.newflow.row.CarbonRowBatch)3 ArrayList (java.util.ArrayList)2 ExecutorService (java.util.concurrent.ExecutorService)2 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)2 CarbonFooterWriter (org.apache.carbondata.core.writer.CarbonFooterWriter)2 FileFooter (org.apache.carbondata.format.FileFooter)2 CarbonRow (org.apache.carbondata.processing.newflow.row.CarbonRow)2 UnsafeCarbonRowPage (org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage)2 SortTempChunkHolder (org.apache.carbondata.processing.newflow.sort.unsafe.holder.SortTempChunkHolder)2 SortDataRows (org.apache.carbondata.processing.sortandgroupby.sortdata.SortDataRows)2 SortTempFileChunkHolder (org.apache.carbondata.processing.sortandgroupby.sortdata.SortTempFileChunkHolder)2