Search in sources :

Example 1 with DataChunk

use of org.apache.carbondata.format.DataChunk in project carbondata by apache.

the class CarbonMetadataUtil method getBlockletInfo.

private static BlockletInfo getBlockletInfo(BlockletInfoColumnar blockletInfoColumnar, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties) throws IOException {
    BlockletInfo blockletInfo = new BlockletInfo();
    blockletInfo.setNum_rows(blockletInfoColumnar.getNumberOfKeys());
    List<DataChunk> colDataChunks = new ArrayList<DataChunk>();
    int j = 0;
    int aggregateIndex = 0;
    boolean[] isSortedKeyColumn = blockletInfoColumnar.getIsSortedKeyColumn();
    boolean[] aggKeyBlock = blockletInfoColumnar.getAggKeyBlock();
    boolean[] colGrpblock = blockletInfoColumnar.getColGrpBlocks();
    for (int i = 0; i < blockletInfoColumnar.getKeyLengths().length; i++) {
        DataChunk dataChunk = new DataChunk();
        dataChunk.setChunk_meta(getChunkCompressionMeta());
        List<Encoding> encodings = new ArrayList<Encoding>();
        if (containsEncoding(i, Encoding.DICTIONARY, columnSchema, segmentProperties)) {
            encodings.add(Encoding.DICTIONARY);
        }
        if (containsEncoding(i, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) {
            encodings.add(Encoding.DIRECT_DICTIONARY);
        }
        dataChunk.setRowMajor(colGrpblock[i]);
        // TODO : Once schema PR is merged and information needs to be passed
        // here.
        dataChunk.setColumn_ids(new ArrayList<Integer>());
        dataChunk.setData_page_length(blockletInfoColumnar.getKeyLengths()[i]);
        dataChunk.setData_page_offset(blockletInfoColumnar.getKeyOffSets()[i]);
        if (aggKeyBlock[i]) {
            dataChunk.setRle_page_offset(blockletInfoColumnar.getDataIndexMapOffsets()[aggregateIndex]);
            dataChunk.setRle_page_length(blockletInfoColumnar.getDataIndexMapLength()[aggregateIndex]);
            encodings.add(Encoding.RLE);
            aggregateIndex++;
        }
        dataChunk.setSort_state(isSortedKeyColumn[i] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE);
        if (!isSortedKeyColumn[i]) {
            dataChunk.setRowid_page_offset(blockletInfoColumnar.getKeyBlockIndexOffSets()[j]);
            dataChunk.setRowid_page_length(blockletInfoColumnar.getKeyBlockIndexLength()[j]);
            if (!encodings.contains(Encoding.INVERTED_INDEX)) {
                encodings.add(Encoding.INVERTED_INDEX);
            }
            j++;
        }
        // TODO : Right now the encodings are happening at runtime. change as per
        // this encoders.
        dataChunk.setEncoders(encodings);
        colDataChunks.add(dataChunk);
    }
    for (int i = 0; i < blockletInfoColumnar.getMeasureLength().length; i++) {
        DataChunk dataChunk = new DataChunk();
        dataChunk.setChunk_meta(getChunkCompressionMeta());
        dataChunk.setRowMajor(false);
        // TODO : Once schema PR is merged and information needs to be passed
        // here.
        dataChunk.setColumn_ids(new ArrayList<Integer>());
        dataChunk.setData_page_length(blockletInfoColumnar.getMeasureLength()[i]);
        dataChunk.setData_page_offset(blockletInfoColumnar.getMeasureOffset()[i]);
        // TODO : Right now the encodings are happening at runtime. change as per
        // this encoders.
        List<Encoding> encodings = new ArrayList<Encoding>();
        encodings.add(Encoding.DELTA);
        dataChunk.setEncoders(encodings);
        // TODO writing dummy presence meta need to set actual presence
        // meta
        PresenceMeta presenceMeta = new PresenceMeta();
        presenceMeta.setPresent_bit_streamIsSet(true);
        presenceMeta.setPresent_bit_stream(blockletInfoColumnar.getMeasureNullValueIndex()[i].toByteArray());
        dataChunk.setPresence(presenceMeta);
        // TODO : PresenceMeta needs to be implemented and set here
        // dataChunk.setPresence(new PresenceMeta());
        // TODO : Need to write ValueCompression meta here.
        List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>();
        encoderMetaList.add(ByteBuffer.wrap(serializeEncoderMeta(createValueEncoderMeta(blockletInfoColumnar.getCompressionModel(), i))));
        dataChunk.setEncoder_meta(encoderMetaList);
        colDataChunks.add(dataChunk);
    }
    blockletInfo.setColumn_data_chunks(colDataChunks);
    return blockletInfo;
}
Also used : BlockletInfo(org.apache.carbondata.format.BlockletInfo) ArrayList(java.util.ArrayList) Encoding(org.apache.carbondata.format.Encoding) ByteBuffer(java.nio.ByteBuffer) DataChunk(org.apache.carbondata.format.DataChunk) PresenceMeta(org.apache.carbondata.format.PresenceMeta)

Example 2 with DataChunk

use of org.apache.carbondata.format.DataChunk in project carbondata by apache.

the class CarbonMetadataUtil method convertBlockletInfo.

/**
   * It converts FileFooter thrift object to list of BlockletInfoColumnar
   * objects
   *
   * @param footer
   * @return
   */
public static List<BlockletInfoColumnar> convertBlockletInfo(FileFooter footer) throws IOException {
    List<BlockletInfoColumnar> listOfNodeInfo = new ArrayList<BlockletInfoColumnar>(CarbonCommonConstants.CONSTANT_SIZE_TEN);
    for (BlockletInfo blockletInfo : footer.getBlocklet_info_list()) {
        BlockletInfoColumnar blockletInfoColumnar = new BlockletInfoColumnar();
        blockletInfoColumnar.setNumberOfKeys(blockletInfo.getNum_rows());
        List<DataChunk> columnChunks = blockletInfo.getColumn_data_chunks();
        List<DataChunk> dictChunks = new ArrayList<DataChunk>();
        List<DataChunk> nonDictColChunks = new ArrayList<DataChunk>();
        for (DataChunk dataChunk : columnChunks) {
            if (dataChunk.getEncoders().get(0).equals(Encoding.DICTIONARY)) {
                dictChunks.add(dataChunk);
            } else {
                nonDictColChunks.add(dataChunk);
            }
        }
        int[] keyLengths = new int[dictChunks.size()];
        long[] keyOffSets = new long[dictChunks.size()];
        long[] keyBlockIndexOffsets = new long[dictChunks.size()];
        int[] keyBlockIndexLens = new int[dictChunks.size()];
        long[] indexMapOffsets = new long[dictChunks.size()];
        int[] indexMapLens = new int[dictChunks.size()];
        boolean[] sortState = new boolean[dictChunks.size()];
        int i = 0;
        for (DataChunk dataChunk : dictChunks) {
            keyLengths[i] = dataChunk.getData_page_length();
            keyOffSets[i] = dataChunk.getData_page_offset();
            keyBlockIndexOffsets[i] = dataChunk.getRowid_page_offset();
            keyBlockIndexLens[i] = dataChunk.getRowid_page_length();
            indexMapOffsets[i] = dataChunk.getRle_page_offset();
            indexMapLens[i] = dataChunk.getRle_page_length();
            sortState[i] = dataChunk.getSort_state().equals(SortState.SORT_EXPLICIT);
            i++;
        }
        blockletInfoColumnar.setKeyLengths(keyLengths);
        blockletInfoColumnar.setKeyOffSets(keyOffSets);
        blockletInfoColumnar.setKeyBlockIndexOffSets(keyBlockIndexOffsets);
        blockletInfoColumnar.setKeyBlockIndexLength(keyBlockIndexLens);
        blockletInfoColumnar.setDataIndexMapOffsets(indexMapOffsets);
        blockletInfoColumnar.setDataIndexMapLength(indexMapLens);
        blockletInfoColumnar.setIsSortedKeyColumn(sortState);
        int[] msrLens = new int[nonDictColChunks.size()];
        long[] msrOffsets = new long[nonDictColChunks.size()];
        ValueEncoderMeta[] encoderMetas = new ValueEncoderMeta[nonDictColChunks.size()];
        i = 0;
        for (DataChunk msrChunk : nonDictColChunks) {
            msrLens[i] = msrChunk.getData_page_length();
            msrOffsets[i] = msrChunk.getData_page_offset();
            encoderMetas[i] = deserializeValueEncoderMeta(msrChunk.getEncoder_meta().get(0));
            i++;
        }
        blockletInfoColumnar.setMeasureLength(msrLens);
        blockletInfoColumnar.setMeasureOffset(msrOffsets);
        blockletInfoColumnar.setCompressionModel(getValueCompressionModel(encoderMetas));
        listOfNodeInfo.add(blockletInfoColumnar);
    }
    setBlockletIndex(footer, listOfNodeInfo);
    return listOfNodeInfo;
}
Also used : BlockletInfoColumnar(org.apache.carbondata.core.metadata.BlockletInfoColumnar) ArrayList(java.util.ArrayList) BlockletInfo(org.apache.carbondata.format.BlockletInfo) DataChunk(org.apache.carbondata.format.DataChunk) ValueEncoderMeta(org.apache.carbondata.core.metadata.ValueEncoderMeta)

Example 3 with DataChunk

use of org.apache.carbondata.format.DataChunk in project carbondata by apache.

the class CarbonMetadataUtilTest method setUp.

@BeforeClass
public static void setUp() {
    objMaxArr = new Long[6];
    objMaxArr[0] = new Long("111111");
    objMaxArr[1] = new Long("121111");
    objMaxArr[2] = new Long("131111");
    objMaxArr[3] = new Long("141111");
    objMaxArr[4] = new Long("151111");
    objMaxArr[5] = new Long("161111");
    objMinArr = new Long[6];
    objMinArr[0] = new Long("119");
    objMinArr[1] = new Long("121");
    objMinArr[2] = new Long("131");
    objMinArr[3] = new Long("141");
    objMinArr[4] = new Long("151");
    objMinArr[5] = new Long("161");
    objDecimal = new int[] { 0, 0, 0, 0, 0, 0 };
    columnSchemaList = new ArrayList<>();
    List<Encoding> encodingList = new ArrayList<>();
    encodingList.add(Encoding.BIT_PACKED);
    encodingList.add(Encoding.DELTA);
    encodingList.add(Encoding.INVERTED_INDEX);
    encodingList.add(Encoding.DIRECT_DICTIONARY);
    byteArr = "412111".getBytes();
    byte[] byteArr1 = "321".getBytes();
    byte[] byteArr2 = "356".getBytes();
    byteBufferList = new ArrayList<>();
    ByteBuffer bb = ByteBuffer.allocate(byteArr.length);
    bb.put(byteArr);
    ByteBuffer bb1 = ByteBuffer.allocate(byteArr1.length);
    bb1.put(byteArr1);
    ByteBuffer bb2 = ByteBuffer.allocate(byteArr2.length);
    bb2.put(byteArr2);
    byteBufferList.add(bb);
    byteBufferList.add(bb1);
    byteBufferList.add(bb2);
    DataChunk dataChunk = new DataChunk();
    dataChunk.setEncoders(encodingList);
    dataChunk.setEncoder_meta(byteBufferList);
    List<DataChunk> dataChunkList = new ArrayList<>();
    dataChunkList.add(dataChunk);
    dataChunkList.add(dataChunk);
    BlockletInfo blockletInfo = new BlockletInfo();
    blockletInfo.setColumn_data_chunks(dataChunkList);
    blockletInfoList = new ArrayList<>();
    blockletInfoList.add(blockletInfo);
    blockletInfoList.add(blockletInfo);
    ValueEncoderMeta meta = CarbonTestUtil.createValueEncoderMeta();
    meta.setDecimal(5);
    meta.setMinValue(objMinArr);
    meta.setMaxValue(objMaxArr);
    meta.setType(org.apache.carbondata.core.metadata.datatype.DataType.DOUBLE_MEASURE_CHAR);
    List<Encoding> encoders = new ArrayList<>();
    encoders.add(Encoding.INVERTED_INDEX);
    encoders.add(Encoding.BIT_PACKED);
    encoders.add(Encoding.DELTA);
    encoders.add(Encoding.DICTIONARY);
    encoders.add(Encoding.DIRECT_DICTIONARY);
    encoders.add(Encoding.RLE);
    ColumnSchema columnSchema = new ColumnSchema(DataType.INT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema1 = new ColumnSchema(DataType.ARRAY, "column", "3", true, encoders, true);
    ColumnSchema columnSchema2 = new ColumnSchema(DataType.DECIMAL, "column", "3", true, encoders, true);
    ColumnSchema columnSchema3 = new ColumnSchema(DataType.DOUBLE, "column", "3", true, encoders, true);
    ColumnSchema columnSchema4 = new ColumnSchema(DataType.LONG, "column", "3", true, encoders, true);
    ColumnSchema columnSchema5 = new ColumnSchema(DataType.SHORT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema6 = new ColumnSchema(DataType.STRUCT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema7 = new ColumnSchema(DataType.STRING, "column", "3", true, encoders, true);
    columnSchemas = new ArrayList<>();
    columnSchemas.add(columnSchema);
    columnSchemas.add(columnSchema1);
    columnSchemas.add(columnSchema2);
    columnSchemas.add(columnSchema3);
    columnSchemas.add(columnSchema4);
    columnSchemas.add(columnSchema5);
    columnSchemas.add(columnSchema6);
    columnSchemas.add(columnSchema7);
}
Also used : ArrayList(java.util.ArrayList) BlockletInfo(org.apache.carbondata.format.BlockletInfo) Encoding(org.apache.carbondata.format.Encoding) DataChunk(org.apache.carbondata.format.DataChunk) ColumnSchema(org.apache.carbondata.format.ColumnSchema) ValueEncoderMeta(org.apache.carbondata.core.metadata.ValueEncoderMeta) ByteBuffer(java.nio.ByteBuffer) BeforeClass(org.junit.BeforeClass)

Aggregations

ArrayList (java.util.ArrayList)3 BlockletInfo (org.apache.carbondata.format.BlockletInfo)3 DataChunk (org.apache.carbondata.format.DataChunk)3 ByteBuffer (java.nio.ByteBuffer)2 ValueEncoderMeta (org.apache.carbondata.core.metadata.ValueEncoderMeta)2 Encoding (org.apache.carbondata.format.Encoding)2 BlockletInfoColumnar (org.apache.carbondata.core.metadata.BlockletInfoColumnar)1 ColumnSchema (org.apache.carbondata.format.ColumnSchema)1 PresenceMeta (org.apache.carbondata.format.PresenceMeta)1 BeforeClass (org.junit.BeforeClass)1