Search in sources :

Example 6 with BlockletMinMaxIndex

use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.

the class CarbonMetadataUtil method setBlockletIndex.

private static void setBlockletIndex(FileFooter footer, List<BlockletInfoColumnar> listOfNodeInfo) {
    List<BlockletIndex> blockletIndexList = footer.getBlocklet_index_list();
    for (int i = 0; i < blockletIndexList.size(); i++) {
        BlockletBTreeIndex bTreeIndexList = blockletIndexList.get(i).getB_tree_index();
        BlockletMinMaxIndex minMaxIndexList = blockletIndexList.get(i).getMin_max_index();
        listOfNodeInfo.get(i).setStartKey(bTreeIndexList.getStart_key());
        listOfNodeInfo.get(i).setEndKey(bTreeIndexList.getEnd_key());
        byte[][] min = new byte[minMaxIndexList.getMin_values().size()][];
        byte[][] max = new byte[minMaxIndexList.getMax_values().size()][];
        for (int j = 0; j < minMaxIndexList.getMax_valuesSize(); j++) {
            min[j] = minMaxIndexList.getMin_values().get(j).array();
            max[j] = minMaxIndexList.getMax_values().get(j).array();
        }
        listOfNodeInfo.get(i).setColumnMaxData(max);
    }
}
Also used : BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) BlockletIndex(org.apache.carbondata.format.BlockletIndex) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex)

Example 7 with BlockletMinMaxIndex

use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.

the class CarbonMetadataUtil method getDatachunk2.

/**
   * Below method will be used to get the data chunk object for all the columns
   *
   * @param nodeHolderList       blocklet info
   * @param columnSchema        list of columns
   * @param segmentProperties    segment properties
   * @return list of data chunks
   * @throws IOException
   */
private static List<DataChunk2> getDatachunk2(List<NodeHolder> nodeHolderList, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties, int index, boolean isDimensionColumn) throws IOException {
    List<DataChunk2> colDataChunks = new ArrayList<DataChunk2>();
    DataChunk2 dataChunk = null;
    NodeHolder nodeHolder = null;
    for (int i = 0; i < nodeHolderList.size(); i++) {
        nodeHolder = nodeHolderList.get(i);
        dataChunk = new DataChunk2();
        dataChunk.min_max = new BlockletMinMaxIndex();
        dataChunk.setChunk_meta(getChunkCompressionMeta());
        dataChunk.setNumberOfRowsInpage(nodeHolder.getEntryCount());
        List<Encoding> encodings = new ArrayList<Encoding>();
        if (isDimensionColumn) {
            dataChunk.setData_page_length(nodeHolder.getKeyLengths()[index]);
            if (containsEncoding(index, Encoding.DICTIONARY, columnSchema, segmentProperties)) {
                encodings.add(Encoding.DICTIONARY);
            }
            if (containsEncoding(index, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) {
                encodings.add(Encoding.DIRECT_DICTIONARY);
            }
            dataChunk.setRowMajor(nodeHolder.getColGrpBlocks()[index]);
            // here.
            if (nodeHolder.getAggBlocks()[index]) {
                dataChunk.setRle_page_length(nodeHolder.getDataIndexMapLength()[index]);
                encodings.add(Encoding.RLE);
            }
            dataChunk.setSort_state(nodeHolder.getIsSortedKeyBlock()[index] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE);
            if (!nodeHolder.getIsSortedKeyBlock()[index]) {
                dataChunk.setRowid_page_length(nodeHolder.getKeyBlockIndexLength()[index]);
                encodings.add(Encoding.INVERTED_INDEX);
            }
            dataChunk.min_max.addToMax_values(ByteBuffer.wrap(nodeHolder.getColumnMaxData()[index]));
            dataChunk.min_max.addToMin_values(ByteBuffer.wrap(nodeHolder.getColumnMinData()[index]));
        } else {
            dataChunk.setData_page_length(nodeHolder.getDataArray()[index].length);
            // TODO : Right now the encodings are happening at runtime. change as
            // per this encoders.
            dataChunk.setEncoders(encodings);
            dataChunk.setRowMajor(false);
            // TODO : Right now the encodings are happening at runtime. change as
            // per this encoders.
            encodings.add(Encoding.DELTA);
            dataChunk.setEncoders(encodings);
            // TODO writing dummy presence meta need to set actual presence
            // meta
            PresenceMeta presenceMeta = new PresenceMeta();
            presenceMeta.setPresent_bit_streamIsSet(true);
            presenceMeta.setPresent_bit_stream(CompressorFactory.getInstance().getCompressor().compressByte(nodeHolder.getMeasureNullValueIndex()[index].toByteArray()));
            dataChunk.setPresence(presenceMeta);
            List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>();
            encoderMetaList.add(ByteBuffer.wrap(serializeEncodeMetaUsingByteBuffer(createValueEncoderMeta(nodeHolder.getCompressionModel(), index))));
            dataChunk.setEncoder_meta(encoderMetaList);
            dataChunk.min_max.addToMax_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMaxData()[index]));
            dataChunk.min_max.addToMin_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMinData()[index]));
        }
        dataChunk.setEncoders(encodings);
        colDataChunks.add(dataChunk);
    }
    return colDataChunks;
}
Also used : DataChunk2(org.apache.carbondata.format.DataChunk2) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) ArrayList(java.util.ArrayList) Encoding(org.apache.carbondata.format.Encoding) PresenceMeta(org.apache.carbondata.format.PresenceMeta) ByteBuffer(java.nio.ByteBuffer)

Example 8 with BlockletMinMaxIndex

use of org.apache.carbondata.format.BlockletMinMaxIndex in project carbondata by apache.

the class CarbonMetadataUtilTest method testConvertFileFooter.

@Test
public void testConvertFileFooter() throws Exception {
    int[] intArr = { 1, 2, 3, 4, 5 };
    boolean[] boolArr = { true, true, true, true, true };
    long[] longArr = { 1, 2, 3, 4, 5 };
    byte[][] maxByteArr = { { 1, 2 }, { 3, 4 }, { 5, 6 }, { 2, 4 }, { 1, 2 } };
    int[] cardinality = { 1, 2, 3, 4, 5 };
    org.apache.carbondata.core.metadata.datatype.DataType[] dataType = { org.apache.carbondata.core.metadata.datatype.DataType.INT, org.apache.carbondata.core.metadata.datatype.DataType.INT, org.apache.carbondata.core.metadata.datatype.DataType.INT, org.apache.carbondata.core.metadata.datatype.DataType.INT, org.apache.carbondata.core.metadata.datatype.DataType.INT };
    org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema colSchema = new org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema();
    org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema colSchema1 = new org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema();
    List<org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema> columnSchemaList = new ArrayList<>();
    columnSchemaList.add(colSchema);
    columnSchemaList.add(colSchema1);
    SegmentProperties segmentProperties = new SegmentProperties(columnSchemaList, cardinality);
    final List<Integer> integerList = new ArrayList<>();
    integerList.add(new Integer("1"));
    integerList.add(new Integer("2"));
    WriterCompressModel writerCompressModel = new WriterCompressModel();
    writerCompressModel.setMaxValue(objMaxArr);
    writerCompressModel.setMinValue(objMinArr);
    writerCompressModel.setDataTypeSelected(byteArr);
    writerCompressModel.setMantissa(intArr);
    writerCompressModel.setType(dataType);
    writerCompressModel.setUniqueValue(objMinArr);
    BlockletInfoColumnar blockletInfoColumnar = new BlockletInfoColumnar();
    BitSet[] bitSetArr = new BitSet[6];
    bitSetArr[0] = new BitSet();
    bitSetArr[1] = new BitSet();
    bitSetArr[2] = new BitSet();
    bitSetArr[3] = new BitSet();
    bitSetArr[4] = new BitSet();
    bitSetArr[5] = new BitSet();
    blockletInfoColumnar.setColumnMaxData(maxByteArr);
    blockletInfoColumnar.setColumnMinData(maxByteArr);
    blockletInfoColumnar.setKeyLengths(intArr);
    blockletInfoColumnar.setColGrpBlocks(boolArr);
    blockletInfoColumnar.setKeyOffSets(longArr);
    blockletInfoColumnar.setDataIndexMapOffsets(longArr);
    blockletInfoColumnar.setAggKeyBlock(boolArr);
    blockletInfoColumnar.setDataIndexMapLength(intArr);
    blockletInfoColumnar.setIsSortedKeyColumn(boolArr);
    blockletInfoColumnar.setKeyOffSets(longArr);
    blockletInfoColumnar.setMeasureLength(intArr);
    blockletInfoColumnar.setMeasureOffset(longArr);
    blockletInfoColumnar.setMeasureNullValueIndex(bitSetArr);
    blockletInfoColumnar.setCompressionModel(writerCompressModel);
    BlockletInfoColumnar blockletInfoColumnar1 = new BlockletInfoColumnar();
    blockletInfoColumnar1.setColumnMaxData(maxByteArr);
    blockletInfoColumnar1.setColumnMinData(maxByteArr);
    blockletInfoColumnar1.setKeyLengths(intArr);
    blockletInfoColumnar1.setKeyOffSets(longArr);
    blockletInfoColumnar1.setDataIndexMapOffsets(longArr);
    blockletInfoColumnar1.setAggKeyBlock(boolArr);
    blockletInfoColumnar1.setDataIndexMapLength(intArr);
    blockletInfoColumnar1.setIsSortedKeyColumn(boolArr);
    blockletInfoColumnar1.setColGrpBlocks(boolArr);
    blockletInfoColumnar1.setKeyOffSets(longArr);
    blockletInfoColumnar1.setMeasureLength(intArr);
    blockletInfoColumnar1.setMeasureOffset(longArr);
    blockletInfoColumnar1.setMeasureNullValueIndex(bitSetArr);
    blockletInfoColumnar1.setCompressionModel(writerCompressModel);
    blockletInfoColumnar1.setColGrpBlocks(boolArr);
    List<BlockletInfoColumnar> blockletInfoColumnarList = new ArrayList<>();
    blockletInfoColumnarList.add(blockletInfoColumnar);
    blockletInfoColumnarList.add(blockletInfoColumnar1);
    new MockUp<CarbonUtil>() {

        @SuppressWarnings("unused")
        @Mock
        public List<Integer> convertToIntegerList(int[] array) {
            return integerList;
        }
    };
    final Set<Integer> integerSet = new HashSet<>();
    integerSet.add(new Integer("1"));
    integerSet.add(new Integer("2"));
    new MockUp<SegmentProperties>() {

        @SuppressWarnings("unused")
        @Mock
        public Set<Integer> getDimensionOrdinalForBlock(int blockIndex) {
            return integerSet;
        }
    };
    SegmentInfo segmentInfo = new SegmentInfo();
    segmentInfo.setNum_cols(4);
    segmentInfo.setColumn_cardinalities(integerList);
    FileFooter fileFooter = new FileFooter();
    fileFooter.setNum_rows(4);
    fileFooter.setSegment_info(segmentInfo);
    byte[] byteMaxArr = "1".getBytes();
    byte[] byteMinArr = "2".getBytes();
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    blockletMinMaxIndex.addToMax_values(ByteBuffer.wrap(byteMaxArr));
    blockletMinMaxIndex.addToMin_values(ByteBuffer.wrap(byteMinArr));
    FileFooter result = convertFileFooter(blockletInfoColumnarList, 4, cardinality, columnSchemas, segmentProperties);
    assertEquals(result.getTable_columns(), columnSchemas);
}
Also used : BlockletInfoColumnar(org.apache.carbondata.core.metadata.BlockletInfoColumnar) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.format.ColumnSchema) MockUp(mockit.MockUp) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) DataType(org.apache.carbondata.format.DataType) HashSet(java.util.HashSet) WriterCompressModel(org.apache.carbondata.core.datastore.compression.WriterCompressModel) BitSet(java.util.BitSet) CarbonMetadataUtil.convertFileFooter(org.apache.carbondata.core.util.CarbonMetadataUtil.convertFileFooter) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) Test(org.junit.Test)

Aggregations

BlockletMinMaxIndex (org.apache.carbondata.format.BlockletMinMaxIndex)8 ArrayList (java.util.ArrayList)4 BlockletBTreeIndex (org.apache.carbondata.format.BlockletBTreeIndex)4 BlockletIndex (org.apache.carbondata.format.BlockletIndex)4 ByteBuffer (java.nio.ByteBuffer)3 DataChunk2 (org.apache.carbondata.format.DataChunk2)2 Encoding (org.apache.carbondata.format.Encoding)2 PresenceMeta (org.apache.carbondata.format.PresenceMeta)2 Test (org.junit.Test)2 BitSet (java.util.BitSet)1 HashSet (java.util.HashSet)1 MockUp (mockit.MockUp)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1 WriterCompressModel (org.apache.carbondata.core.datastore.compression.WriterCompressModel)1 BlockletInfoColumnar (org.apache.carbondata.core.metadata.BlockletInfoColumnar)1 BlockIndexInfo (org.apache.carbondata.core.metadata.index.BlockIndexInfo)1 CarbonMetadataUtil.convertFileFooter (org.apache.carbondata.core.util.CarbonMetadataUtil.convertFileFooter)1 CarbonMetadataUtil.getBlockIndexInfo (org.apache.carbondata.core.util.CarbonMetadataUtil.getBlockIndexInfo)1 ColumnSchema (org.apache.carbondata.format.ColumnSchema)1 DataType (org.apache.carbondata.format.DataType)1