Search in sources :

Example 1 with BlockletMinMaxIndex

use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.

the class CarbonMetadataUtil method getBlockletIndex.

public static BlockletIndex getBlockletIndex(List<NodeHolder> nodeHolderList, List<CarbonMeasure> carbonMeasureList) {
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    // Calculating min/max for every each column.
    byte[][] minCol = nodeHolderList.get(0).getColumnMinData().clone();
    byte[][] maxCol = nodeHolderList.get(0).getColumnMaxData().clone();
    for (NodeHolder nodeHolder : nodeHolderList) {
        byte[][] columnMaxData = nodeHolder.getColumnMaxData();
        byte[][] columnMinData = nodeHolder.getColumnMinData();
        for (int i = 0; i < maxCol.length; i++) {
            if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(columnMaxData[i], maxCol[i]) > 0) {
                maxCol[i] = columnMaxData[i];
            }
            if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(columnMinData[i], minCol[i]) < 0) {
                minCol[i] = columnMinData[i];
            }
        }
    }
    // Writing min/max to thrift file
    for (byte[] max : maxCol) {
        blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
    }
    for (byte[] min : minCol) {
        blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
    }
    byte[][] measureMaxValue = nodeHolderList.get(0).getMeasureColumnMaxData().clone();
    byte[][] measureMinValue = nodeHolderList.get(0).getMeasureColumnMinData().clone();
    byte[] minVal = null;
    byte[] maxVal = null;
    for (int i = 1; i < nodeHolderList.size(); i++) {
        for (int j = 0; j < measureMinValue.length; j++) {
            minVal = nodeHolderList.get(i).getMeasureColumnMinData()[j];
            maxVal = nodeHolderList.get(i).getMeasureColumnMaxData()[j];
            if (compareMeasureData(measureMaxValue[j], maxVal, carbonMeasureList.get(j).getDataType()) < 0) {
                measureMaxValue[j] = maxVal.clone();
            }
            if (compareMeasureData(measureMinValue[j], minVal, carbonMeasureList.get(j).getDataType()) > 0) {
                measureMinValue[j] = minVal.clone();
            }
        }
    }
    for (byte[] max : measureMaxValue) {
        blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
    }
    for (byte[] min : measureMinValue) {
        blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
    }
    BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
    blockletBTreeIndex.setStart_key(nodeHolderList.get(0).getStartKey());
    blockletBTreeIndex.setEnd_key(nodeHolderList.get(nodeHolderList.size() - 1).getEndKey());
    BlockletIndex blockletIndex = new BlockletIndex();
    blockletIndex.setMin_max_index(blockletMinMaxIndex);
    blockletIndex.setB_tree_index(blockletBTreeIndex);
    return blockletIndex;
}
Also used : BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) BlockletIndex(org.apache.carbondata.format.BlockletIndex) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex)

Example 2 with BlockletMinMaxIndex

use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.

the class CarbonMetadataUtil method getBlockletIndex.

private static BlockletIndex getBlockletIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex info) {
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    for (int i = 0; i < info.getMinMaxIndex().getMaxValues().length; i++) {
        blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(info.getMinMaxIndex().getMaxValues()[i]));
        blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(info.getMinMaxIndex().getMinValues()[i]));
    }
    BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
    blockletBTreeIndex.setStart_key(info.getBtreeIndex().getStartKey());
    blockletBTreeIndex.setEnd_key(info.getBtreeIndex().getEndKey());
    BlockletIndex blockletIndex = new BlockletIndex();
    blockletIndex.setMin_max_index(blockletMinMaxIndex);
    blockletIndex.setB_tree_index(blockletBTreeIndex);
    return blockletIndex;
}
Also used : BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) BlockletIndex(org.apache.carbondata.format.BlockletIndex) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex)

Example 3 with BlockletMinMaxIndex

use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.

the class CarbonMetadataUtilTest method testGetBlockIndexInfo.

@Test
public void testGetBlockIndexInfo() throws Exception {
    byte[] startKey = { 1, 2, 3, 4, 5 };
    byte[] endKey = { 9, 3, 5, 5, 5 };
    byte[] byteArr = { 1, 2, 3, 4, 5 };
    List<ByteBuffer> minList = new ArrayList<>();
    minList.add(ByteBuffer.wrap(byteArr));
    byte[] byteArr1 = { 9, 9, 8, 6, 7 };
    List<ByteBuffer> maxList = new ArrayList<>();
    maxList.add(ByteBuffer.wrap(byteArr1));
    org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex blockletMinMaxIndex = new org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex(minList, maxList);
    org.apache.carbondata.core.metadata.blocklet.index.BlockletBTreeIndex blockletBTreeIndex = new org.apache.carbondata.core.metadata.blocklet.index.BlockletBTreeIndex(startKey, endKey);
    org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex blockletIndex = new org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex(blockletBTreeIndex, blockletMinMaxIndex);
    BlockIndexInfo blockIndexInfo = new BlockIndexInfo(1, "file", 1, blockletIndex);
    List<BlockIndexInfo> blockIndexInfoList = new ArrayList<>();
    blockIndexInfoList.add(blockIndexInfo);
    List<BlockIndex> result = getBlockIndexInfo(blockIndexInfoList);
    String expected = "file";
    assertEquals(result.get(0).file_name, expected);
}
Also used : ArrayList(java.util.ArrayList) ByteBuffer(java.nio.ByteBuffer) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) CarbonMetadataUtil.getBlockIndexInfo(org.apache.carbondata.core.util.CarbonMetadataUtil.getBlockIndexInfo) BlockIndexInfo(org.apache.carbondata.core.metadata.index.BlockIndexInfo) Test(org.junit.Test)

Example 4 with BlockletMinMaxIndex

use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.

the class CarbonMetadataUtil method getDataChunk2.

/**
   * Below method will be used to get the data chunk2 serialize object list
   *
   * @param nodeHolder        node holder
   * @param columnSchema     table columns
   * @param segmentProperties segment properties
   * @param isDimensionColumn to get the list of dimension column or measure column
   * @return list of data chunk2
   * @throws IOException
   */
public static List<byte[]> getDataChunk2(NodeHolder nodeHolder, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties, boolean isDimensionColumn) throws IOException {
    List<byte[]> dataChunkBuffer = new ArrayList<>();
    if (isDimensionColumn) {
        for (int i = 0; i < nodeHolder.getKeyArray().length; i++) {
            DataChunk2 dataChunk = new DataChunk2();
            dataChunk.min_max = new BlockletMinMaxIndex();
            dataChunk.setChunk_meta(getChunkCompressionMeta());
            dataChunk.setNumberOfRowsInpage(nodeHolder.getEntryCount());
            List<Encoding> encodings = new ArrayList<Encoding>();
            dataChunk.setData_page_length(nodeHolder.getKeyLengths()[i]);
            if (containsEncoding(i, Encoding.DICTIONARY, columnSchema, segmentProperties)) {
                encodings.add(Encoding.DICTIONARY);
            }
            if (containsEncoding(i, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) {
                encodings.add(Encoding.DIRECT_DICTIONARY);
            }
            dataChunk.setRowMajor(nodeHolder.getColGrpBlocks()[i]);
            if (nodeHolder.getAggBlocks()[i]) {
                dataChunk.setRle_page_length(nodeHolder.getDataIndexMapLength()[i]);
                encodings.add(Encoding.RLE);
            }
            dataChunk.setSort_state(nodeHolder.getIsSortedKeyBlock()[i] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE);
            if (!nodeHolder.getIsSortedKeyBlock()[i]) {
                dataChunk.setRowid_page_length(nodeHolder.getKeyBlockIndexLength()[i]);
                encodings.add(Encoding.INVERTED_INDEX);
            }
            dataChunk.min_max.addToMax_values(ByteBuffer.wrap(nodeHolder.getColumnMaxData()[i]));
            dataChunk.min_max.addToMin_values(ByteBuffer.wrap(nodeHolder.getColumnMinData()[i]));
            dataChunk.setEncoders(encodings);
            dataChunkBuffer.add(CarbonUtil.getByteArray(dataChunk));
        }
    } else {
        for (int i = 0; i < nodeHolder.getDataArray().length; i++) {
            DataChunk2 dataChunk = new DataChunk2();
            dataChunk.min_max = new BlockletMinMaxIndex();
            dataChunk.setChunk_meta(getChunkCompressionMeta());
            dataChunk.setNumberOfRowsInpage(nodeHolder.getEntryCount());
            dataChunk.setData_page_length(nodeHolder.getDataArray()[i].length);
            List<Encoding> encodings = new ArrayList<Encoding>();
            // TODO : Right now the encodings are happening at runtime. change as
            // per this encoders.
            dataChunk.setEncoders(encodings);
            dataChunk.setRowMajor(false);
            // TODO : Right now the encodings are happening at runtime. change as
            // per this encoders.
            encodings.add(Encoding.DELTA);
            dataChunk.setEncoders(encodings);
            // TODO writing dummy presence meta need to set actual presence
            // meta
            PresenceMeta presenceMeta = new PresenceMeta();
            presenceMeta.setPresent_bit_streamIsSet(true);
            presenceMeta.setPresent_bit_stream(CompressorFactory.getInstance().getCompressor().compressByte(nodeHolder.getMeasureNullValueIndex()[i].toByteArray()));
            dataChunk.setPresence(presenceMeta);
            List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>();
            encoderMetaList.add(ByteBuffer.wrap(serializeEncodeMetaUsingByteBuffer(createValueEncoderMeta(nodeHolder.getCompressionModel(), i))));
            dataChunk.setEncoder_meta(encoderMetaList);
            dataChunk.min_max.addToMax_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMaxData()[i]));
            dataChunk.min_max.addToMin_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMinData()[i]));
            dataChunkBuffer.add(CarbonUtil.getByteArray(dataChunk));
        }
    }
    return dataChunkBuffer;
}
Also used : DataChunk2(org.apache.carbondata.format.DataChunk2) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) ArrayList(java.util.ArrayList) Encoding(org.apache.carbondata.format.Encoding) PresenceMeta(org.apache.carbondata.format.PresenceMeta) ByteBuffer(java.nio.ByteBuffer)

Example 5 with BlockletMinMaxIndex

use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.

the class CarbonMetadataUtil method getBlockletIndex.

private static BlockletIndex getBlockletIndex(BlockletInfoColumnar info) {
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    for (byte[] max : info.getColumnMaxData()) {
        blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(max));
    }
    for (byte[] min : info.getColumnMinData()) {
        blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(min));
    }
    BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
    blockletBTreeIndex.setStart_key(info.getStartKey());
    blockletBTreeIndex.setEnd_key(info.getEndKey());
    BlockletIndex blockletIndex = new BlockletIndex();
    blockletIndex.setMin_max_index(blockletMinMaxIndex);
    blockletIndex.setB_tree_index(blockletBTreeIndex);
    return blockletIndex;
}
Also used : BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) BlockletIndex(org.apache.carbondata.format.BlockletIndex) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex)

Aggregations

BlockletMinMaxIndex (org.apache.carbondata.format.BlockletMinMaxIndex)8 ArrayList (java.util.ArrayList)4 BlockletBTreeIndex (org.apache.carbondata.format.BlockletBTreeIndex)4 BlockletIndex (org.apache.carbondata.format.BlockletIndex)4 ByteBuffer (java.nio.ByteBuffer)3 DataChunk2 (org.apache.carbondata.format.DataChunk2)2 Encoding (org.apache.carbondata.format.Encoding)2 PresenceMeta (org.apache.carbondata.format.PresenceMeta)2 Test (org.junit.Test)2 BitSet (java.util.BitSet)1 HashSet (java.util.HashSet)1 MockUp (mockit.MockUp)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1 WriterCompressModel (org.apache.carbondata.core.datastore.compression.WriterCompressModel)1 BlockletInfoColumnar (org.apache.carbondata.core.metadata.BlockletInfoColumnar)1 BlockIndexInfo (org.apache.carbondata.core.metadata.index.BlockIndexInfo)1 CarbonMetadataUtil.convertFileFooter (org.apache.carbondata.core.util.CarbonMetadataUtil.convertFileFooter)1 CarbonMetadataUtil.getBlockIndexInfo (org.apache.carbondata.core.util.CarbonMetadataUtil.getBlockIndexInfo)1 ColumnSchema (org.apache.carbondata.format.ColumnSchema)1 DataType (org.apache.carbondata.format.DataType)1