Search in sources :

Example 1 with PresenceMeta

use of org.apache.carbondata.format.PresenceMeta in project carbondata by apache.

the class CarbonMetadataUtil method getDatachunk2.

/**
   * Below method will be used to get the data chunk object for all the columns
   *
   * @param blockletInfoColumnar blocklet info
   * @param columnSchema        list of columns
   * @param segmentProperties    segment properties
   * @return list of data chunks
   * @throws IOException
   */
public static List<DataChunk2> getDatachunk2(BlockletInfoColumnar blockletInfoColumnar, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties) throws IOException {
    List<DataChunk2> colDataChunks = new ArrayList<DataChunk2>();
    int rowIdIndex = 0;
    int aggregateIndex = 0;
    boolean[] isSortedKeyColumn = blockletInfoColumnar.getIsSortedKeyColumn();
    boolean[] aggKeyBlock = blockletInfoColumnar.getAggKeyBlock();
    boolean[] colGrpblock = blockletInfoColumnar.getColGrpBlocks();
    for (int i = 0; i < blockletInfoColumnar.getKeyLengths().length; i++) {
        DataChunk2 dataChunk = new DataChunk2();
        dataChunk.setChunk_meta(getChunkCompressionMeta());
        List<Encoding> encodings = new ArrayList<Encoding>();
        if (containsEncoding(i, Encoding.DICTIONARY, columnSchema, segmentProperties)) {
            encodings.add(Encoding.DICTIONARY);
        }
        if (containsEncoding(i, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) {
            encodings.add(Encoding.DIRECT_DICTIONARY);
        }
        dataChunk.setRowMajor(colGrpblock[i]);
        // TODO : Once schema PR is merged and information needs to be passed
        // here.
        dataChunk.setData_page_length(blockletInfoColumnar.getKeyLengths()[i]);
        if (aggKeyBlock[i]) {
            dataChunk.setRle_page_length(blockletInfoColumnar.getDataIndexMapLength()[aggregateIndex]);
            encodings.add(Encoding.RLE);
            aggregateIndex++;
        }
        dataChunk.setSort_state(isSortedKeyColumn[i] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE);
        if (!isSortedKeyColumn[i]) {
            dataChunk.setRowid_page_length(blockletInfoColumnar.getKeyBlockIndexLength()[rowIdIndex]);
            encodings.add(Encoding.INVERTED_INDEX);
            rowIdIndex++;
        }
        // TODO : Right now the encodings are happening at runtime. change as per
        // this encoders.
        dataChunk.setEncoders(encodings);
        colDataChunks.add(dataChunk);
    }
    for (int i = 0; i < blockletInfoColumnar.getMeasureLength().length; i++) {
        DataChunk2 dataChunk = new DataChunk2();
        dataChunk.setChunk_meta(getChunkCompressionMeta());
        dataChunk.setRowMajor(false);
        // TODO : Once schema PR is merged and information needs to be passed
        // here.
        dataChunk.setData_page_length(blockletInfoColumnar.getMeasureLength()[i]);
        // TODO : Right now the encodings are happening at runtime. change as per
        // this encoders.
        List<Encoding> encodings = new ArrayList<Encoding>();
        encodings.add(Encoding.DELTA);
        dataChunk.setEncoders(encodings);
        // TODO writing dummy presence meta need to set actual presence
        // meta
        PresenceMeta presenceMeta = new PresenceMeta();
        presenceMeta.setPresent_bit_streamIsSet(true);
        presenceMeta.setPresent_bit_stream(CompressorFactory.getInstance().getCompressor().compressByte(blockletInfoColumnar.getMeasureNullValueIndex()[i].toByteArray()));
        dataChunk.setPresence(presenceMeta);
        // TODO : PresenceMeta needs to be implemented and set here
        // dataChunk.setPresence(new PresenceMeta());
        // TODO : Need to write ValueCompression meta here.
        List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>();
        encoderMetaList.add(ByteBuffer.wrap(serializeEncoderMeta(createValueEncoderMeta(blockletInfoColumnar.getCompressionModel(), i))));
        dataChunk.setEncoder_meta(encoderMetaList);
        colDataChunks.add(dataChunk);
    }
    return colDataChunks;
}
Also used : DataChunk2(org.apache.carbondata.format.DataChunk2) ArrayList(java.util.ArrayList) Encoding(org.apache.carbondata.format.Encoding) PresenceMeta(org.apache.carbondata.format.PresenceMeta) ByteBuffer(java.nio.ByteBuffer)

Example 2 with PresenceMeta

use of org.apache.carbondata.format.PresenceMeta in project carbondata by apache.

the class CarbonMetadataUtil method getDataChunk2.

/**
   * Below method will be used to get the data chunk2 serialize object list
   *
   * @param nodeHolder        node holder
   * @param columnSchema     table columns
   * @param segmentProperties segment properties
   * @param isDimensionColumn to get the list of dimension column or measure column
   * @return list of data chunk2
   * @throws IOException
   */
public static List<byte[]> getDataChunk2(NodeHolder nodeHolder, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties, boolean isDimensionColumn) throws IOException {
    List<byte[]> dataChunkBuffer = new ArrayList<>();
    if (isDimensionColumn) {
        for (int i = 0; i < nodeHolder.getKeyArray().length; i++) {
            DataChunk2 dataChunk = new DataChunk2();
            dataChunk.min_max = new BlockletMinMaxIndex();
            dataChunk.setChunk_meta(getChunkCompressionMeta());
            dataChunk.setNumberOfRowsInpage(nodeHolder.getEntryCount());
            List<Encoding> encodings = new ArrayList<Encoding>();
            dataChunk.setData_page_length(nodeHolder.getKeyLengths()[i]);
            if (containsEncoding(i, Encoding.DICTIONARY, columnSchema, segmentProperties)) {
                encodings.add(Encoding.DICTIONARY);
            }
            if (containsEncoding(i, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) {
                encodings.add(Encoding.DIRECT_DICTIONARY);
            }
            dataChunk.setRowMajor(nodeHolder.getColGrpBlocks()[i]);
            if (nodeHolder.getAggBlocks()[i]) {
                dataChunk.setRle_page_length(nodeHolder.getDataIndexMapLength()[i]);
                encodings.add(Encoding.RLE);
            }
            dataChunk.setSort_state(nodeHolder.getIsSortedKeyBlock()[i] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE);
            if (!nodeHolder.getIsSortedKeyBlock()[i]) {
                dataChunk.setRowid_page_length(nodeHolder.getKeyBlockIndexLength()[i]);
                encodings.add(Encoding.INVERTED_INDEX);
            }
            dataChunk.min_max.addToMax_values(ByteBuffer.wrap(nodeHolder.getColumnMaxData()[i]));
            dataChunk.min_max.addToMin_values(ByteBuffer.wrap(nodeHolder.getColumnMinData()[i]));
            dataChunk.setEncoders(encodings);
            dataChunkBuffer.add(CarbonUtil.getByteArray(dataChunk));
        }
    } else {
        for (int i = 0; i < nodeHolder.getDataArray().length; i++) {
            DataChunk2 dataChunk = new DataChunk2();
            dataChunk.min_max = new BlockletMinMaxIndex();
            dataChunk.setChunk_meta(getChunkCompressionMeta());
            dataChunk.setNumberOfRowsInpage(nodeHolder.getEntryCount());
            dataChunk.setData_page_length(nodeHolder.getDataArray()[i].length);
            List<Encoding> encodings = new ArrayList<Encoding>();
            // TODO : Right now the encodings are happening at runtime. change as
            // per this encoders.
            dataChunk.setEncoders(encodings);
            dataChunk.setRowMajor(false);
            // TODO : Right now the encodings are happening at runtime. change as
            // per this encoders.
            encodings.add(Encoding.DELTA);
            dataChunk.setEncoders(encodings);
            // TODO writing dummy presence meta need to set actual presence
            // meta
            PresenceMeta presenceMeta = new PresenceMeta();
            presenceMeta.setPresent_bit_streamIsSet(true);
            presenceMeta.setPresent_bit_stream(CompressorFactory.getInstance().getCompressor().compressByte(nodeHolder.getMeasureNullValueIndex()[i].toByteArray()));
            dataChunk.setPresence(presenceMeta);
            List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>();
            encoderMetaList.add(ByteBuffer.wrap(serializeEncodeMetaUsingByteBuffer(createValueEncoderMeta(nodeHolder.getCompressionModel(), i))));
            dataChunk.setEncoder_meta(encoderMetaList);
            dataChunk.min_max.addToMax_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMaxData()[i]));
            dataChunk.min_max.addToMin_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMinData()[i]));
            dataChunkBuffer.add(CarbonUtil.getByteArray(dataChunk));
        }
    }
    return dataChunkBuffer;
}
Also used : DataChunk2(org.apache.carbondata.format.DataChunk2) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) ArrayList(java.util.ArrayList) Encoding(org.apache.carbondata.format.Encoding) PresenceMeta(org.apache.carbondata.format.PresenceMeta) ByteBuffer(java.nio.ByteBuffer)

Example 3 with PresenceMeta

use of org.apache.carbondata.format.PresenceMeta in project carbondata by apache.

the class CarbonMetadataUtil method getBlockletInfo.

private static BlockletInfo getBlockletInfo(BlockletInfoColumnar blockletInfoColumnar, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties) throws IOException {
    BlockletInfo blockletInfo = new BlockletInfo();
    blockletInfo.setNum_rows(blockletInfoColumnar.getNumberOfKeys());
    List<DataChunk> colDataChunks = new ArrayList<DataChunk>();
    int j = 0;
    int aggregateIndex = 0;
    boolean[] isSortedKeyColumn = blockletInfoColumnar.getIsSortedKeyColumn();
    boolean[] aggKeyBlock = blockletInfoColumnar.getAggKeyBlock();
    boolean[] colGrpblock = blockletInfoColumnar.getColGrpBlocks();
    for (int i = 0; i < blockletInfoColumnar.getKeyLengths().length; i++) {
        DataChunk dataChunk = new DataChunk();
        dataChunk.setChunk_meta(getChunkCompressionMeta());
        List<Encoding> encodings = new ArrayList<Encoding>();
        if (containsEncoding(i, Encoding.DICTIONARY, columnSchema, segmentProperties)) {
            encodings.add(Encoding.DICTIONARY);
        }
        if (containsEncoding(i, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) {
            encodings.add(Encoding.DIRECT_DICTIONARY);
        }
        dataChunk.setRowMajor(colGrpblock[i]);
        // TODO : Once schema PR is merged and information needs to be passed
        // here.
        dataChunk.setColumn_ids(new ArrayList<Integer>());
        dataChunk.setData_page_length(blockletInfoColumnar.getKeyLengths()[i]);
        dataChunk.setData_page_offset(blockletInfoColumnar.getKeyOffSets()[i]);
        if (aggKeyBlock[i]) {
            dataChunk.setRle_page_offset(blockletInfoColumnar.getDataIndexMapOffsets()[aggregateIndex]);
            dataChunk.setRle_page_length(blockletInfoColumnar.getDataIndexMapLength()[aggregateIndex]);
            encodings.add(Encoding.RLE);
            aggregateIndex++;
        }
        dataChunk.setSort_state(isSortedKeyColumn[i] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE);
        if (!isSortedKeyColumn[i]) {
            dataChunk.setRowid_page_offset(blockletInfoColumnar.getKeyBlockIndexOffSets()[j]);
            dataChunk.setRowid_page_length(blockletInfoColumnar.getKeyBlockIndexLength()[j]);
            if (!encodings.contains(Encoding.INVERTED_INDEX)) {
                encodings.add(Encoding.INVERTED_INDEX);
            }
            j++;
        }
        // TODO : Right now the encodings are happening at runtime. change as per
        // this encoders.
        dataChunk.setEncoders(encodings);
        colDataChunks.add(dataChunk);
    }
    for (int i = 0; i < blockletInfoColumnar.getMeasureLength().length; i++) {
        DataChunk dataChunk = new DataChunk();
        dataChunk.setChunk_meta(getChunkCompressionMeta());
        dataChunk.setRowMajor(false);
        // TODO : Once schema PR is merged and information needs to be passed
        // here.
        dataChunk.setColumn_ids(new ArrayList<Integer>());
        dataChunk.setData_page_length(blockletInfoColumnar.getMeasureLength()[i]);
        dataChunk.setData_page_offset(blockletInfoColumnar.getMeasureOffset()[i]);
        // TODO : Right now the encodings are happening at runtime. change as per
        // this encoders.
        List<Encoding> encodings = new ArrayList<Encoding>();
        encodings.add(Encoding.DELTA);
        dataChunk.setEncoders(encodings);
        // TODO writing dummy presence meta need to set actual presence
        // meta
        PresenceMeta presenceMeta = new PresenceMeta();
        presenceMeta.setPresent_bit_streamIsSet(true);
        presenceMeta.setPresent_bit_stream(blockletInfoColumnar.getMeasureNullValueIndex()[i].toByteArray());
        dataChunk.setPresence(presenceMeta);
        // TODO : PresenceMeta needs to be implemented and set here
        // dataChunk.setPresence(new PresenceMeta());
        // TODO : Need to write ValueCompression meta here.
        List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>();
        encoderMetaList.add(ByteBuffer.wrap(serializeEncoderMeta(createValueEncoderMeta(blockletInfoColumnar.getCompressionModel(), i))));
        dataChunk.setEncoder_meta(encoderMetaList);
        colDataChunks.add(dataChunk);
    }
    blockletInfo.setColumn_data_chunks(colDataChunks);
    return blockletInfo;
}
Also used : BlockletInfo(org.apache.carbondata.format.BlockletInfo) ArrayList(java.util.ArrayList) Encoding(org.apache.carbondata.format.Encoding) ByteBuffer(java.nio.ByteBuffer) DataChunk(org.apache.carbondata.format.DataChunk) PresenceMeta(org.apache.carbondata.format.PresenceMeta)

Example 4 with PresenceMeta

use of org.apache.carbondata.format.PresenceMeta in project carbondata by apache.

the class CarbonMetadataUtil method getDatachunk2.

/**
   * Below method will be used to get the data chunk object for all the columns
   *
   * @param nodeHolderList       blocklet info
   * @param columnSchema        list of columns
   * @param segmentProperties    segment properties
   * @return list of data chunks
   * @throws IOException
   */
private static List<DataChunk2> getDatachunk2(List<NodeHolder> nodeHolderList, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties, int index, boolean isDimensionColumn) throws IOException {
    List<DataChunk2> colDataChunks = new ArrayList<DataChunk2>();
    DataChunk2 dataChunk = null;
    NodeHolder nodeHolder = null;
    for (int i = 0; i < nodeHolderList.size(); i++) {
        nodeHolder = nodeHolderList.get(i);
        dataChunk = new DataChunk2();
        dataChunk.min_max = new BlockletMinMaxIndex();
        dataChunk.setChunk_meta(getChunkCompressionMeta());
        dataChunk.setNumberOfRowsInpage(nodeHolder.getEntryCount());
        List<Encoding> encodings = new ArrayList<Encoding>();
        if (isDimensionColumn) {
            dataChunk.setData_page_length(nodeHolder.getKeyLengths()[index]);
            if (containsEncoding(index, Encoding.DICTIONARY, columnSchema, segmentProperties)) {
                encodings.add(Encoding.DICTIONARY);
            }
            if (containsEncoding(index, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) {
                encodings.add(Encoding.DIRECT_DICTIONARY);
            }
            dataChunk.setRowMajor(nodeHolder.getColGrpBlocks()[index]);
            // here.
            if (nodeHolder.getAggBlocks()[index]) {
                dataChunk.setRle_page_length(nodeHolder.getDataIndexMapLength()[index]);
                encodings.add(Encoding.RLE);
            }
            dataChunk.setSort_state(nodeHolder.getIsSortedKeyBlock()[index] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE);
            if (!nodeHolder.getIsSortedKeyBlock()[index]) {
                dataChunk.setRowid_page_length(nodeHolder.getKeyBlockIndexLength()[index]);
                encodings.add(Encoding.INVERTED_INDEX);
            }
            dataChunk.min_max.addToMax_values(ByteBuffer.wrap(nodeHolder.getColumnMaxData()[index]));
            dataChunk.min_max.addToMin_values(ByteBuffer.wrap(nodeHolder.getColumnMinData()[index]));
        } else {
            dataChunk.setData_page_length(nodeHolder.getDataArray()[index].length);
            // TODO : Right now the encodings are happening at runtime. change as
            // per this encoders.
            dataChunk.setEncoders(encodings);
            dataChunk.setRowMajor(false);
            // TODO : Right now the encodings are happening at runtime. change as
            // per this encoders.
            encodings.add(Encoding.DELTA);
            dataChunk.setEncoders(encodings);
            // TODO writing dummy presence meta need to set actual presence
            // meta
            PresenceMeta presenceMeta = new PresenceMeta();
            presenceMeta.setPresent_bit_streamIsSet(true);
            presenceMeta.setPresent_bit_stream(CompressorFactory.getInstance().getCompressor().compressByte(nodeHolder.getMeasureNullValueIndex()[index].toByteArray()));
            dataChunk.setPresence(presenceMeta);
            List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>();
            encoderMetaList.add(ByteBuffer.wrap(serializeEncodeMetaUsingByteBuffer(createValueEncoderMeta(nodeHolder.getCompressionModel(), index))));
            dataChunk.setEncoder_meta(encoderMetaList);
            dataChunk.min_max.addToMax_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMaxData()[index]));
            dataChunk.min_max.addToMin_values(ByteBuffer.wrap(nodeHolder.getMeasureColumnMinData()[index]));
        }
        dataChunk.setEncoders(encodings);
        colDataChunks.add(dataChunk);
    }
    return colDataChunks;
}
Also used : DataChunk2(org.apache.carbondata.format.DataChunk2) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) ArrayList(java.util.ArrayList) Encoding(org.apache.carbondata.format.Encoding) PresenceMeta(org.apache.carbondata.format.PresenceMeta) ByteBuffer(java.nio.ByteBuffer)

Example 5 with PresenceMeta

use of org.apache.carbondata.format.PresenceMeta in project carbondata by apache.

the class ColumnPageEncoder method fillNullBitSet.

private void fillNullBitSet(ColumnPage inputPage, DataChunk2 dataChunk) {
    PresenceMeta presenceMeta = new PresenceMeta();
    presenceMeta.setPresent_bit_streamIsSet(true);
    Compressor compressor = CompressorFactory.getInstance().getCompressor();
    presenceMeta.setPresent_bit_stream(compressor.compressByte(inputPage.getNullBits().toByteArray()));
    dataChunk.setPresence(presenceMeta);
}
Also used : Compressor(org.apache.carbondata.core.datastore.compression.Compressor) PresenceMeta(org.apache.carbondata.format.PresenceMeta)

Aggregations

PresenceMeta (org.apache.carbondata.format.PresenceMeta)5 ByteBuffer (java.nio.ByteBuffer)4 ArrayList (java.util.ArrayList)4 Encoding (org.apache.carbondata.format.Encoding)4 DataChunk2 (org.apache.carbondata.format.DataChunk2)3 BlockletMinMaxIndex (org.apache.carbondata.format.BlockletMinMaxIndex)2 Compressor (org.apache.carbondata.core.datastore.compression.Compressor)1 BlockletInfo (org.apache.carbondata.format.BlockletInfo)1 DataChunk (org.apache.carbondata.format.DataChunk)1