Search in sources :

Example 1 with ColGroupBlockStorage

use of org.apache.carbondata.processing.store.colgroup.ColGroupBlockStorage in project carbondata by apache.

the class CarbonFactDataWriterImplV1 method buildDataNodeHolder.

@Override
public NodeHolder buildDataNodeHolder(IndexStorage<int[]>[] keyStorageArray, byte[][] measureArray, int entryCount, byte[] startKey, byte[] endKey, WriterCompressModel compressionModel, byte[] noDictionaryStartKey, byte[] noDictionaryEndKey, BitSet[] nullValueIndexBitSet) throws CarbonDataWriterException {
    // set the empty byte array
    if (null == noDictionaryEndKey) {
        noDictionaryEndKey = new byte[0];
    }
    if (null == noDictionaryStartKey) {
        noDictionaryStartKey = new byte[0];
    }
    // total measure length;
    int totalMsrArrySize = 0;
    // current measure length;
    int currentMsrLenght = 0;
    int totalKeySize = 0;
    int keyBlockSize = 0;
    boolean[] isSortedData = new boolean[keyStorageArray.length];
    int[] keyLengths = new int[keyStorageArray.length];
    //below will calculate min and max value for each column
    //for below 2d array, first index will be for column and second will be min max
    // value for same column
    //    byte[][] columnMinMaxData = new byte[keyStorageArray.length][];
    byte[][] allMinValue = new byte[keyStorageArray.length][];
    byte[][] allMaxValue = new byte[keyStorageArray.length][];
    byte[][] keyBlockData = fillAndCompressedKeyBlockData(keyStorageArray, entryCount);
    boolean[] colGrpBlock = new boolean[keyStorageArray.length];
    for (int i = 0; i < keyLengths.length; i++) {
        keyLengths[i] = keyBlockData[i].length;
        isSortedData[i] = keyStorageArray[i].isAlreadySorted();
        if (!isSortedData[i]) {
            keyBlockSize++;
        }
        totalKeySize += keyLengths[i];
        if (dataWriterVo.getIsComplexType()[i] || dataWriterVo.getIsDictionaryColumn()[i]) {
            allMinValue[i] = keyStorageArray[i].getMin();
            allMaxValue[i] = keyStorageArray[i].getMax();
        } else {
            allMinValue[i] = updateMinMaxForNoDictionary(keyStorageArray[i].getMin());
            allMaxValue[i] = updateMinMaxForNoDictionary(keyStorageArray[i].getMax());
        }
        //if keyStorageArray is instance of ColGroupBlockStorage than it's colGroup chunk
        if (keyStorageArray[i] instanceof ColGroupBlockStorage) {
            colGrpBlock[i] = true;
        }
    }
    int[] keyBlockIdxLengths = new int[keyBlockSize];
    byte[][] dataAfterCompression = new byte[keyBlockSize][];
    byte[][] indexMap = new byte[keyBlockSize][];
    int idx = 0;
    for (int i = 0; i < isSortedData.length; i++) {
        if (!isSortedData[i]) {
            dataAfterCompression[idx] = numberCompressor.compress(keyStorageArray[i].getDataAfterComp());
            if (null != keyStorageArray[i].getIndexMap() && keyStorageArray[i].getIndexMap().length > 0) {
                indexMap[idx] = numberCompressor.compress(keyStorageArray[i].getIndexMap());
            } else {
                indexMap[idx] = new byte[0];
            }
            keyBlockIdxLengths[idx] = (dataAfterCompression[idx].length + indexMap[idx].length) + CarbonCommonConstants.INT_SIZE_IN_BYTE;
            idx++;
        }
    }
    int compressDataBlockSize = 0;
    for (int i = 0; i < dataWriterVo.getAggBlocks().length; i++) {
        if (dataWriterVo.getAggBlocks()[i]) {
            compressDataBlockSize++;
        }
    }
    byte[][] compressedDataIndex = new byte[compressDataBlockSize][];
    int[] dataIndexMapLength = new int[compressDataBlockSize];
    idx = 0;
    for (int i = 0; i < dataWriterVo.getAggBlocks().length; i++) {
        if (dataWriterVo.getAggBlocks()[i]) {
            try {
                compressedDataIndex[idx] = numberCompressor.compress(keyStorageArray[i].getDataIndexMap());
                dataIndexMapLength[idx] = compressedDataIndex[idx].length;
                idx++;
            } catch (Exception e) {
                throw new CarbonDataWriterException(e.getMessage());
            }
        }
    }
    int[] msrLength = new int[dataWriterVo.getMeasureCount()];
    // each measure size
    for (int i = 0; i < measureArray.length; i++) {
        currentMsrLenght = measureArray[i].length;
        totalMsrArrySize += currentMsrLenght;
        msrLength[i] = currentMsrLenght;
    }
    NodeHolder holder = new NodeHolder();
    holder.setDataArray(measureArray);
    holder.setKeyArray(keyBlockData);
    holder.setMeasureNullValueIndex(nullValueIndexBitSet);
    // end key format will be <length of dictionary key><length of no
    // dictionary key><DictionaryKey><No Dictionary key>
    byte[] updatedNoDictionaryEndKey = updateNoDictionaryStartAndEndKey(noDictionaryEndKey);
    ByteBuffer buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + CarbonCommonConstants.INT_SIZE_IN_BYTE + endKey.length + updatedNoDictionaryEndKey.length);
    buffer.putInt(endKey.length);
    buffer.putInt(updatedNoDictionaryEndKey.length);
    buffer.put(endKey);
    buffer.put(updatedNoDictionaryEndKey);
    buffer.rewind();
    holder.setEndKey(buffer.array());
    holder.setMeasureLenght(msrLength);
    byte[] updatedNoDictionaryStartKey = updateNoDictionaryStartAndEndKey(noDictionaryStartKey);
    // start key format will be <length of dictionary key><length of no
    // dictionary key><DictionaryKey><No Dictionary key>
    buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + CarbonCommonConstants.INT_SIZE_IN_BYTE + startKey.length + updatedNoDictionaryStartKey.length);
    buffer.putInt(startKey.length);
    buffer.putInt(updatedNoDictionaryStartKey.length);
    buffer.put(startKey);
    buffer.put(updatedNoDictionaryStartKey);
    buffer.rewind();
    holder.setStartKey(buffer.array());
    holder.setEntryCount(entryCount);
    holder.setKeyLengths(keyLengths);
    holder.setKeyBlockIndexLength(keyBlockIdxLengths);
    holder.setIsSortedKeyBlock(isSortedData);
    holder.setCompressedIndex(dataAfterCompression);
    holder.setCompressedIndexMap(indexMap);
    holder.setDataIndexMapLength(dataIndexMapLength);
    holder.setCompressedDataIndex(compressedDataIndex);
    holder.setCompressionModel(compressionModel);
    holder.setTotalDimensionArrayLength(totalKeySize);
    holder.setTotalMeasureArrayLength(totalMsrArrySize);
    //setting column min max value
    holder.setColumnMaxData(allMaxValue);
    holder.setColumnMinData(allMinValue);
    holder.setAggBlocks(dataWriterVo.getAggBlocks());
    holder.setColGrpBlocks(colGrpBlock);
    return holder;
}
Also used : ColGroupBlockStorage(org.apache.carbondata.processing.store.colgroup.ColGroupBlockStorage) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) NodeHolder(org.apache.carbondata.core.util.NodeHolder) ByteBuffer(java.nio.ByteBuffer) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) IOException(java.io.IOException)

Example 2 with ColGroupBlockStorage

use of org.apache.carbondata.processing.store.colgroup.ColGroupBlockStorage in project carbondata by apache.

the class CarbonFactDataWriterImplV3 method buildDataNodeHolder.

/**
   * Below method will be used to build the node holder object
   * This node holder object will be used to persist data which will
   * be written in carbon data file
   */
@Override
public NodeHolder buildDataNodeHolder(IndexStorage<short[]>[] keyStorageArray, byte[][] measureArray, int entryCount, byte[] startKey, byte[] endKey, WriterCompressModel compressionModel, byte[] noDictionaryStartKey, byte[] noDictionaryEndKey, BitSet[] nullValueIndexBitSet) throws CarbonDataWriterException {
    // set the empty byte array
    if (null == noDictionaryEndKey) {
        noDictionaryEndKey = new byte[0];
    }
    if (null == noDictionaryStartKey) {
        noDictionaryStartKey = new byte[0];
    }
    // total measure length;
    int totalMsrArrySize = 0;
    // current measure length;
    int currentMsrLenght = 0;
    int totalKeySize = 0;
    int keyBlockSize = 0;
    boolean[] isSortedData = new boolean[keyStorageArray.length];
    int[] keyLengths = new int[keyStorageArray.length];
    // below will calculate min and max value for each column
    // for below 2d array, first index will be for column and second will be min and max
    // value for same column
    byte[][] dimensionMinValue = new byte[keyStorageArray.length][];
    byte[][] dimensionMaxValue = new byte[keyStorageArray.length][];
    byte[][] measureMinValue = new byte[measureArray.length][];
    byte[][] measureMaxValue = new byte[measureArray.length][];
    byte[][] keyBlockData = fillAndCompressedKeyBlockData(keyStorageArray, entryCount);
    boolean[] colGrpBlock = new boolean[keyStorageArray.length];
    for (int i = 0; i < keyLengths.length; i++) {
        keyLengths[i] = keyBlockData[i].length;
        isSortedData[i] = keyStorageArray[i].isAlreadySorted();
        keyBlockSize++;
        totalKeySize += keyLengths[i];
        if (dataWriterVo.getIsComplexType()[i] || dataWriterVo.getIsDictionaryColumn()[i]) {
            dimensionMinValue[i] = keyStorageArray[i].getMin();
            dimensionMaxValue[i] = keyStorageArray[i].getMax();
        } else {
            dimensionMinValue[i] = updateMinMaxForNoDictionary(keyStorageArray[i].getMin());
            dimensionMaxValue[i] = updateMinMaxForNoDictionary(keyStorageArray[i].getMax());
        }
        // colGroup chunk
        if (keyStorageArray[i] instanceof ColGroupBlockStorage) {
            colGrpBlock[i] = true;
        }
    }
    for (int i = 0; i < measureArray.length; i++) {
        measureMaxValue[i] = CarbonMetadataUtil.getByteValueForMeasure(compressionModel.getMaxValue()[i], dataWriterVo.getSegmentProperties().getMeasures().get(i).getDataType());
        measureMinValue[i] = CarbonMetadataUtil.getByteValueForMeasure(compressionModel.getMinValue()[i], dataWriterVo.getSegmentProperties().getMeasures().get(i).getDataType());
    }
    int[] keyBlockIdxLengths = new int[keyBlockSize];
    byte[][] dataAfterCompression = new byte[keyBlockSize][];
    byte[][] indexMap = new byte[keyBlockSize][];
    for (int i = 0; i < isSortedData.length; i++) {
        if (!isSortedData[i]) {
            dataAfterCompression[i] = getByteArray(keyStorageArray[i].getDataAfterComp());
            if (null != keyStorageArray[i].getIndexMap() && keyStorageArray[i].getIndexMap().length > 0) {
                indexMap[i] = getByteArray(keyStorageArray[i].getIndexMap());
            } else {
                indexMap[i] = new byte[0];
            }
            keyBlockIdxLengths[i] = (dataAfterCompression[i].length + indexMap[i].length) + CarbonCommonConstants.INT_SIZE_IN_BYTE;
        }
    }
    byte[][] compressedDataIndex = new byte[keyBlockSize][];
    int[] dataIndexMapLength = new int[keyBlockSize];
    for (int i = 0; i < dataWriterVo.getAggBlocks().length; i++) {
        if (dataWriterVo.getAggBlocks()[i]) {
            try {
                compressedDataIndex[i] = getByteArray(keyStorageArray[i].getDataIndexMap());
                dataIndexMapLength[i] = compressedDataIndex[i].length;
            } catch (Exception e) {
                throw new CarbonDataWriterException(e.getMessage(), e);
            }
        }
    }
    int[] msrLength = new int[dataWriterVo.getMeasureCount()];
    // each measure size
    for (int i = 0; i < measureArray.length; i++) {
        currentMsrLenght = measureArray[i].length;
        totalMsrArrySize += currentMsrLenght;
        msrLength[i] = currentMsrLenght;
    }
    NodeHolder holder = new NodeHolder();
    holder.setDataArray(measureArray);
    holder.setKeyArray(keyBlockData);
    holder.setMeasureNullValueIndex(nullValueIndexBitSet);
    // end key format will be <length of dictionary key><length of no
    // dictionary key><DictionaryKey><No Dictionary key>
    byte[] updatedNoDictionaryEndKey = updateNoDictionaryStartAndEndKey(noDictionaryEndKey);
    ByteBuffer buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + CarbonCommonConstants.INT_SIZE_IN_BYTE + endKey.length + updatedNoDictionaryEndKey.length);
    buffer.putInt(endKey.length);
    buffer.putInt(updatedNoDictionaryEndKey.length);
    buffer.put(endKey);
    buffer.put(updatedNoDictionaryEndKey);
    buffer.rewind();
    holder.setEndKey(buffer.array());
    holder.setMeasureLenght(msrLength);
    byte[] updatedNoDictionaryStartKey = updateNoDictionaryStartAndEndKey(noDictionaryStartKey);
    // start key format will be <length of dictionary key><length of no
    // dictionary key><DictionaryKey><No Dictionary key>
    buffer = ByteBuffer.allocate(CarbonCommonConstants.INT_SIZE_IN_BYTE + CarbonCommonConstants.INT_SIZE_IN_BYTE + startKey.length + updatedNoDictionaryStartKey.length);
    buffer.putInt(startKey.length);
    buffer.putInt(updatedNoDictionaryStartKey.length);
    buffer.put(startKey);
    buffer.put(updatedNoDictionaryStartKey);
    buffer.rewind();
    holder.setStartKey(buffer.array());
    holder.setEntryCount(entryCount);
    holder.setKeyLengths(keyLengths);
    holder.setKeyBlockIndexLength(keyBlockIdxLengths);
    holder.setIsSortedKeyBlock(isSortedData);
    holder.setCompressedIndex(dataAfterCompression);
    holder.setCompressedIndexMap(indexMap);
    holder.setDataIndexMapLength(dataIndexMapLength);
    holder.setCompressedDataIndex(compressedDataIndex);
    holder.setCompressionModel(compressionModel);
    holder.setTotalDimensionArrayLength(totalKeySize);
    holder.setTotalMeasureArrayLength(totalMsrArrySize);
    holder.setMeasureColumnMaxData(measureMaxValue);
    holder.setMeasureColumnMinData(measureMinValue);
    // setting column min max value
    holder.setColumnMaxData(dimensionMaxValue);
    holder.setColumnMinData(dimensionMinValue);
    holder.setAggBlocks(dataWriterVo.getAggBlocks());
    holder.setColGrpBlocks(colGrpBlock);
    List<byte[]> dimensionDataChunk2 = null;
    List<byte[]> measureDataChunk2 = null;
    try {
        dimensionDataChunk2 = CarbonMetadataUtil.getDataChunk2(holder, thriftColumnSchemaList, dataWriterVo.getSegmentProperties(), true);
        measureDataChunk2 = CarbonMetadataUtil.getDataChunk2(holder, thriftColumnSchemaList, dataWriterVo.getSegmentProperties(), false);
    } catch (IOException e) {
        throw new CarbonDataWriterException(e.getMessage());
    }
    holder.setHolderSize(calculateSize(holder, dimensionDataChunk2, measureDataChunk2));
    return holder;
}
Also used : ColGroupBlockStorage(org.apache.carbondata.processing.store.colgroup.ColGroupBlockStorage) IOException(java.io.IOException) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) NodeHolder(org.apache.carbondata.core.util.NodeHolder) ByteBuffer(java.nio.ByteBuffer) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) IOException(java.io.IOException)

Aggregations

IOException (java.io.IOException)2 ByteBuffer (java.nio.ByteBuffer)2 NodeHolder (org.apache.carbondata.core.util.NodeHolder)2 ColGroupBlockStorage (org.apache.carbondata.processing.store.colgroup.ColGroupBlockStorage)2 CarbonDataWriterException (org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException)2