Search in sources :

Example 16 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class CarbonFactDataWriterImplV2 method writeDataToFile.

/**
   * Below method will be used to write the data to file
   * Data Format
   * <DColumn1DataChunk><DColumnDataPage><DColumnRle>
   * <DColumn2DataChunk><DColumn2DataPage><DColumn2RowIds><DColumn2Rle>
   * <DColumn3DataChunk><DColumn3DataPage><column3RowIds>
   * <MColumn1DataChunk><MColumn1DataPage>
   * <MColumn2DataChunk><MColumn2DataPage>
   * <MColumn2DataChunk><MColumn2DataPage>
   *
   * @param nodeHolder
   * @param dataChunksBytes
   * @param channel
   * @throws CarbonDataWriterException
   */
private void writeDataToFile(NodeHolder nodeHolder, byte[][] dataChunksBytes, FileChannel channel) throws CarbonDataWriterException {
    long offset = 0;
    try {
        offset = channel.size();
    } catch (IOException e) {
        throw new CarbonDataWriterException("Problem while getting the file channel size");
    }
    List<Long> currentDataChunksOffset = new ArrayList<>();
    List<Short> currentDataChunksLength = new ArrayList<>();
    dataChunksLength.add(currentDataChunksLength);
    dataChunksOffsets.add(currentDataChunksOffset);
    int bufferSize = 0;
    int rowIdIndex = 0;
    int rleIndex = 0;
    for (int i = 0; i < nodeHolder.getIsSortedKeyBlock().length; i++) {
        currentDataChunksOffset.add(offset);
        currentDataChunksLength.add((short) dataChunksBytes[i].length);
        bufferSize += dataChunksBytes[i].length + nodeHolder.getKeyLengths()[i] + (!nodeHolder.getIsSortedKeyBlock()[i] ? nodeHolder.getKeyBlockIndexLength()[rowIdIndex] : 0) + (dataWriterVo.getAggBlocks()[i] ? nodeHolder.getCompressedDataIndex()[rleIndex].length : 0);
        offset += dataChunksBytes[i].length;
        offset += nodeHolder.getKeyLengths()[i];
        if (!nodeHolder.getIsSortedKeyBlock()[i]) {
            offset += nodeHolder.getKeyBlockIndexLength()[rowIdIndex];
            rowIdIndex++;
        }
        if (dataWriterVo.getAggBlocks()[i]) {
            offset += nodeHolder.getDataIndexMapLength()[rleIndex];
            rleIndex++;
        }
    }
    ByteBuffer buffer = ByteBuffer.allocate(bufferSize);
    rleIndex = 0;
    rowIdIndex = 0;
    for (int i = 0; i < nodeHolder.getIsSortedKeyBlock().length; i++) {
        buffer.put(dataChunksBytes[i]);
        buffer.put(nodeHolder.getKeyArray()[i]);
        if (!nodeHolder.getIsSortedKeyBlock()[i]) {
            buffer.putInt(nodeHolder.getCompressedIndex()[rowIdIndex].length);
            buffer.put(nodeHolder.getCompressedIndex()[rowIdIndex]);
            if (nodeHolder.getCompressedIndexMap()[rowIdIndex].length > 0) {
                buffer.put(nodeHolder.getCompressedIndexMap()[rowIdIndex]);
            }
            rowIdIndex++;
        }
        if (dataWriterVo.getAggBlocks()[i]) {
            buffer.put(nodeHolder.getCompressedDataIndex()[rleIndex]);
            rleIndex++;
        }
    }
    try {
        buffer.flip();
        channel.write(buffer);
    } catch (IOException e) {
        throw new CarbonDataWriterException("Problem while writing the dimension data in carbon data file", e);
    }
    int dataChunkIndex = nodeHolder.getKeyArray().length;
    int totalLength = 0;
    for (int i = 0; i < nodeHolder.getDataArray().length; i++) {
        currentDataChunksOffset.add(offset);
        currentDataChunksLength.add((short) dataChunksBytes[dataChunkIndex].length);
        offset += dataChunksBytes[dataChunkIndex].length;
        offset += nodeHolder.getDataArray()[i].length;
        totalLength += dataChunksBytes[dataChunkIndex].length;
        totalLength += nodeHolder.getDataArray()[i].length;
        dataChunkIndex++;
    }
    buffer = ByteBuffer.allocate(totalLength);
    dataChunkIndex = nodeHolder.getKeyArray().length;
    for (int i = 0; i < nodeHolder.getDataArray().length; i++) {
        buffer.put(dataChunksBytes[dataChunkIndex++]);
        buffer.put(nodeHolder.getDataArray()[i]);
    }
    try {
        buffer.flip();
        channel.write(buffer);
    } catch (IOException e) {
        throw new CarbonDataWriterException("Problem while writing the measure data in carbon data file", e);
    }
}
Also used : ArrayList(java.util.ArrayList) IOException(java.io.IOException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) ByteBuffer(java.nio.ByteBuffer)

Example 17 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class CarbonFactDataWriterImplV3 method writeDataToFile.

private void writeDataToFile(FileChannel channel) {
    // get the list of node holder list
    List<NodeHolder> nodeHolderList = dataWriterHolder.getNodeHolder();
    long blockletDataSize = 0;
    // get data chunks for all the column
    byte[][] dataChunkBytes = new byte[nodeHolderList.get(0).getKeyArray().length + nodeHolderList.get(0).getDataArray().length][];
    int measureStartIndex = nodeHolderList.get(0).getKeyArray().length;
    // calculate the size of data chunks
    try {
        for (int i = 0; i < nodeHolderList.get(0).getKeyArray().length; i++) {
            dataChunkBytes[i] = CarbonUtil.getByteArray(CarbonMetadataUtil.getDataChunk3(nodeHolderList, thriftColumnSchemaList, dataWriterVo.getSegmentProperties(), i, true));
            blockletDataSize += dataChunkBytes[i].length;
        }
        for (int i = 0; i < nodeHolderList.get(0).getDataArray().length; i++) {
            dataChunkBytes[measureStartIndex] = CarbonUtil.getByteArray(CarbonMetadataUtil.getDataChunk3(nodeHolderList, thriftColumnSchemaList, dataWriterVo.getSegmentProperties(), i, false));
            blockletDataSize += dataChunkBytes[measureStartIndex].length;
            measureStartIndex++;
        }
    } catch (IOException e) {
        throw new CarbonDataWriterException("Problem while getting the data chunks", e);
    }
    // calculate the total size of data to be written
    blockletDataSize += dataWriterHolder.getSize();
    // to check if data size will exceed the block size then create a new file
    updateBlockletFileChannel(blockletDataSize);
    // write data to file
    writeDataToFile(fileChannel, dataChunkBytes);
    // clear the data holder
    dataWriterHolder.clear();
}
Also used : IOException(java.io.IOException) NodeHolder(org.apache.carbondata.core.util.NodeHolder) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)

Example 18 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class CarbonFactDataWriterImplV3 method buildDataNodeHolder.

/**
   * Below method will be used to build the node holder object
   * This node holder object will be used to persist data which will
   * be written in carbon data file
   */
@Override
public NodeHolder buildDataNodeHolder(IndexStorage<short[]>[] keyStorageArray, byte[][] measureArray, int entryCount, byte[] startKey, byte[] endKey, WriterCompressModel compressionModel, byte[] noDictionaryStartKey, byte[] noDictionaryEndKey, BitSet[] nullValueIndexBitSet) throws CarbonDataWriterException {
    // set the empty byte array
    if (null == noDictionaryEndKey) {
        noDictionaryEndKey = new byte[0];
    }
    if (null == noDictionaryStartKey) {
        noDictionaryStartKey = new byte[0];
    }
    // total measure length;
    int totalMsrArrySize = 0;
    // current measure length;
    int currentMsrLenght = 0;
    int totalKeySize = 0;
    int keyBlockSize = 0;
    boolean[] isSortedData = new boolean[keyStorageArray.length];
    int[] keyLengths = new int[keyStorageArray.length];
    // below will calculate min and max value for each column
    // for below 2d array, first index will be for column and second will be min and max
    // value for same column
    byte[][] dimensionMinValue = new byte[keyStorageArray.length][];
    byte[][] dimensionMaxValue = new byte[keyStorageArray.length][];
    byte[][] measureMinValue = new byte[measureArray.length][];
    byte[][] measureMaxValue = new byte[measureArray.length][];
    byte[][] keyBlockData = fillAndCompressedKeyBlockData(keyStorageArray, entryCount);
    boolean[] colGrpBlock = new boolean[keyStorageArray.length];
    for (int i = 0; i < keyLengths.length; i++) {
        keyLengths[i] = keyBlockData[i].length;
        isSortedData[i] = keyStorageArray[i].isAlreadySorted();
        keyBlockSize++;
        totalKeySize += keyLengths[i];
        if (dataWriterVo.getIsComplexType()[i] || dataWriterVo.getIsDictionaryColumn()[i]) {
            dimensionMinValue[i] = keyStorageArray[i].getMin();
            dimensionMaxValue[i] = keyStorageArray[i].getMax();
        } else {
            dimensionMinValue[i] = updateMinMaxForNoDictionary(keyStorageArray[i].getMin());
            dimensionMaxValue[i] = updateMinMaxForNoDictionary(keyStorageArray[i].getMax());
        }
        // colGroup chunk
        if (keyStorageArray[i] instanceof ColGroupBlockStorage) {
            colGrpBlock[i] = true;
        }
    }
    for (int i = 0; i < measureArray.length; i++) {
        measureMaxValue[i] = CarbonMetadataUtil.getByteValueForMeasure(compressionModel.getMaxValue()[i], dataWriterVo.getSegmentProperties().getMeasures().get(i).getDataType());
        measureMinValue[i] = CarbonMetadataUtil.getByteValueForMeasure(compressionModel.getMinValue()[i], dataWriterVo.getSegmentProperties().getMeasures().get(i).getDataType());
    }
    int[] keyBlockIdxLengths = new int[keyBlockSize];
    byte[][] dataAfterCompression = new byte[keyBlockSize][];
    byte[][] indexMap = new byte[keyBlockSize][];
    for (int i = 0; i < isSortedData.length; i++) {
        if (!isSortedData[i]) {
            dataAfterCompression[i] = getByteArray(keyStorageArray[i].getDataAfterComp());
            if (null != keyStorageArray[i].getIndexMap() && keyStorageArray[i].getIndexMap().length > 0) {
                indexMap[i] = getByteArray(keyStorageArray[i].getIndexMap());
            } else {
                indexMap[i] = new byte[0];
            }
            keyBlockIdxLengths[i] = (dataAfterCompression[i].length + indexMap[i].length) + CarbonCommonConstants.INT_SIZE_IN_BYTE;
        }
    }
    byte[][] compressedDataIndex = new byte[keyBlockSize][];
    int[] dataIndexMapLength = new int[keyBlockSize];
    for (int i = 0; i < dataWriterVo.getAggBlocks().length; i++) {
        if (dataWriterVo.getAggBlocks()[i]) {
            try {
                compressedDataIndex[i] = getByteArray(keyStorageArray[i].getDataIndexMap());
                dataIndexMapLength[i] = compressedDataIndex[i].length;
            } catch (Exception e) {
                throw new CarbonDataWriterException(e.getMessage(), e);
            }
        }
    }
    int[] msrLength = new int[dataWriterVo.getMeasureCount()];
    // each measure size
    for (int i = 0; i < measureArray.length; i++) {
        currentMsrLenght = measureArray[i].length;
        totalMsrArrySize += currentMsrLenght;
        msrLength[i] = currentMsrLenght;
    }
    NodeHolder holder = new NodeHolder();
    holder.setDataArray(measureArray);
    holder.setKeyArray(keyBlockData);
    holder.setMeasureNullValueIndex(nullValueIndexBitSet);
    // end key format will be <length of dictionary key><length of no
    // dictionary key><DictionaryKey><No Dictionary key>
    byte[] updatedNoDictionaryEndKey = updateNoDictionaryStartAndEndKey(noDictionaryEndKey);
    ByteBuffer buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + CarbonCommonConstants.INT_SIZE_IN_BYTE + endKey.length + updatedNoDictionaryEndKey.length);
    buffer.putInt(endKey.length);
    buffer.putInt(updatedNoDictionaryEndKey.length);
    buffer.put(endKey);
    buffer.put(updatedNoDictionaryEndKey);
    buffer.rewind();
    holder.setEndKey(buffer.array());
    holder.setMeasureLenght(msrLength);
    byte[] updatedNoDictionaryStartKey = updateNoDictionaryStartAndEndKey(noDictionaryStartKey);
    // start key format will be <length of dictionary key><length of no
    // dictionary key><DictionaryKey><No Dictionary key>
    buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + CarbonCommonConstants.INT_SIZE_IN_BYTE + startKey.length + updatedNoDictionaryStartKey.length);
    buffer.putInt(startKey.length);
    buffer.putInt(updatedNoDictionaryStartKey.length);
    buffer.put(startKey);
    buffer.put(updatedNoDictionaryStartKey);
    buffer.rewind();
    holder.setStartKey(buffer.array());
    holder.setEntryCount(entryCount);
    holder.setKeyLengths(keyLengths);
    holder.setKeyBlockIndexLength(keyBlockIdxLengths);
    holder.setIsSortedKeyBlock(isSortedData);
    holder.setCompressedIndex(dataAfterCompression);
    holder.setCompressedIndexMap(indexMap);
    holder.setDataIndexMapLength(dataIndexMapLength);
    holder.setCompressedDataIndex(compressedDataIndex);
    holder.setCompressionModel(compressionModel);
    holder.setTotalDimensionArrayLength(totalKeySize);
    holder.setTotalMeasureArrayLength(totalMsrArrySize);
    holder.setMeasureColumnMaxData(measureMaxValue);
    holder.setMeasureColumnMinData(measureMinValue);
    // setting column min max value
    holder.setColumnMaxData(dimensionMaxValue);
    holder.setColumnMinData(dimensionMinValue);
    holder.setAggBlocks(dataWriterVo.getAggBlocks());
    holder.setColGrpBlocks(colGrpBlock);
    List<byte[]> dimensionDataChunk2 = null;
    List<byte[]> measureDataChunk2 = null;
    try {
        dimensionDataChunk2 = CarbonMetadataUtil.getDataChunk2(holder, thriftColumnSchemaList, dataWriterVo.getSegmentProperties(), true);
        measureDataChunk2 = CarbonMetadataUtil.getDataChunk2(holder, thriftColumnSchemaList, dataWriterVo.getSegmentProperties(), false);
    } catch (IOException e) {
        throw new CarbonDataWriterException(e.getMessage());
    }
    holder.setHolderSize(calculateSize(holder, dimensionDataChunk2, measureDataChunk2));
    return holder;
}
Also used : ColGroupBlockStorage(org.apache.carbondata.processing.store.colgroup.ColGroupBlockStorage) IOException(java.io.IOException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) NodeHolder(org.apache.carbondata.core.util.NodeHolder) ByteBuffer(java.nio.ByteBuffer) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) IOException(java.io.IOException)

Example 19 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class SingleThreadFinalSortFilesMerger method startSorting.

/**
   * Below method will be used to start storing process This method will get
   * all the temp files present in sort temp folder then it will create the
   * record holder heap and then it will read first record from each file and
   * initialize the heap
   *
   * @throws CarbonSortKeyAndGroupByException
   */
private void startSorting(File[] files) throws CarbonDataWriterException {
    this.fileCounter = files.length;
    if (fileCounter == 0) {
        LOGGER.info("No files to merge sort");
        return;
    }
    this.fileBufferSize = CarbonDataProcessorUtil.getFileBufferSize(this.fileCounter, CarbonProperties.getInstance(), CarbonCommonConstants.CONSTANT_SIZE_TEN);
    LOGGER.info("Number of temp file: " + this.fileCounter);
    LOGGER.info("File Buffer Size: " + this.fileBufferSize);
    // create record holder heap
    createRecordHolderQueue(files);
    // iterate over file list and create chunk holder and add to heap
    LOGGER.info("Started adding first record from each file");
    int maxThreadForSorting = 0;
    try {
        maxThreadForSorting = Integer.parseInt(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_MERGE_SORT_READER_THREAD, CarbonCommonConstants.CARBON_MERGE_SORT_READER_THREAD_DEFAULTVALUE));
    } catch (NumberFormatException e) {
        maxThreadForSorting = Integer.parseInt(CarbonCommonConstants.CARBON_MERGE_SORT_READER_THREAD_DEFAULTVALUE);
    }
    ExecutorService service = Executors.newFixedThreadPool(maxThreadForSorting);
    for (final File tempFile : files) {
        Callable<Void> runnable = new Callable<Void>() {

            @Override
            public Void call() throws CarbonSortKeyAndGroupByException {
                // create chunk holder
                SortTempFileChunkHolder sortTempFileChunkHolder = new SortTempFileChunkHolder(tempFile, dimensionCount, complexDimensionCount, measureCount, fileBufferSize, noDictionaryCount, measureDataType, isNoDictionaryColumn, isNoDictionarySortColumn);
                // initialize
                sortTempFileChunkHolder.initialize();
                sortTempFileChunkHolder.readRow();
                synchronized (LOCKOBJECT) {
                    recordHolderHeapLocal.add(sortTempFileChunkHolder);
                }
                // add to heap
                return null;
            }
        };
        service.submit(runnable);
    }
    service.shutdown();
    try {
        service.awaitTermination(2, TimeUnit.HOURS);
    } catch (Exception e) {
        throw new CarbonDataWriterException(e.getMessage(), e);
    }
    LOGGER.info("Heap Size" + this.recordHolderHeapLocal.size());
}
Also used : ExecutorService(java.util.concurrent.ExecutorService) SortTempFileChunkHolder(org.apache.carbondata.processing.sortandgroupby.sortdata.SortTempFileChunkHolder) File(java.io.File) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) Callable(java.util.concurrent.Callable) CarbonSortKeyAndGroupByException(org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)

Example 20 with CarbonDataWriterException

use of org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException in project carbondata by apache.

the class AbstractFactDataWriter method renameCarbonDataFile.

/**
   * This method will rename carbon data file from in progress status to normal
   *
   * @throws CarbonDataWriterException
   */
protected void renameCarbonDataFile() throws CarbonDataWriterException {
    File origFile = new File(this.carbonDataFileTempPath.substring(0, this.carbonDataFileTempPath.lastIndexOf('.')));
    File curFile = new File(this.carbonDataFileTempPath);
    if (!curFile.renameTo(origFile)) {
        throw new CarbonDataWriterException("Problem while renaming the file");
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) File(java.io.File) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)

Aggregations

CarbonDataWriterException (org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)22 IOException (java.io.IOException)14 ByteBuffer (java.nio.ByteBuffer)7 Iterator (java.util.Iterator)5 CarbonDataLoadingException (org.apache.carbondata.processing.newflow.exception.CarbonDataLoadingException)5 CarbonSortKeyAndGroupByException (org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException)5 NodeHolder (org.apache.carbondata.core.util.NodeHolder)4 File (java.io.File)3 CarbonIterator (org.apache.carbondata.common.CarbonIterator)3 CarbonRowBatch (org.apache.carbondata.processing.newflow.row.CarbonRowBatch)3 ArrayList (java.util.ArrayList)2 ExecutorService (java.util.concurrent.ExecutorService)2 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)2 CarbonFooterWriter (org.apache.carbondata.core.writer.CarbonFooterWriter)2 FileFooter (org.apache.carbondata.format.FileFooter)2 CarbonRow (org.apache.carbondata.processing.newflow.row.CarbonRow)2 UnsafeCarbonRowPage (org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage)2 SortTempChunkHolder (org.apache.carbondata.processing.newflow.sort.unsafe.holder.SortTempChunkHolder)2 SortDataRows (org.apache.carbondata.processing.sortandgroupby.sortdata.SortDataRows)2 SortTempFileChunkHolder (org.apache.carbondata.processing.sortandgroupby.sortdata.SortTempFileChunkHolder)2