Search in sources :

Example 1 with Encoding

use of org.apache.carbondata.format.Encoding in project carbondata by apache.

the class CarbonMetadataUtil method getDatachunk2.

/**
   * Below method will be used to get the data chunk object for all the columns
   *
   * @param blockletInfoColumnar blocklet info
   * @param columnSchema        list of columns
   * @param segmentProperties    segment properties
   * @return list of data chunks
   * @throws IOException
   */
public static List<DataChunk2> getDatachunk2(BlockletInfoColumnar blockletInfoColumnar, List<ColumnSchema> columnSchema, SegmentProperties segmentProperties) throws IOException {
    List<DataChunk2> colDataChunks = new ArrayList<DataChunk2>();
    int rowIdIndex = 0;
    int aggregateIndex = 0;
    boolean[] isSortedKeyColumn = blockletInfoColumnar.getIsSortedKeyColumn();
    boolean[] aggKeyBlock = blockletInfoColumnar.getAggKeyBlock();
    boolean[] colGrpblock = blockletInfoColumnar.getColGrpBlocks();
    for (int i = 0; i < blockletInfoColumnar.getKeyLengths().length; i++) {
        DataChunk2 dataChunk = new DataChunk2();
        dataChunk.setChunk_meta(getChunkCompressionMeta());
        List<Encoding> encodings = new ArrayList<Encoding>();
        if (containsEncoding(i, Encoding.DICTIONARY, columnSchema, segmentProperties)) {
            encodings.add(Encoding.DICTIONARY);
        }
        if (containsEncoding(i, Encoding.DIRECT_DICTIONARY, columnSchema, segmentProperties)) {
            encodings.add(Encoding.DIRECT_DICTIONARY);
        }
        dataChunk.setRowMajor(colGrpblock[i]);
        // TODO : Once schema PR is merged and information needs to be passed
        // here.
        dataChunk.setData_page_length(blockletInfoColumnar.getKeyLengths()[i]);
        if (aggKeyBlock[i]) {
            dataChunk.setRle_page_length(blockletInfoColumnar.getDataIndexMapLength()[aggregateIndex]);
            encodings.add(Encoding.RLE);
            aggregateIndex++;
        }
        dataChunk.setSort_state(isSortedKeyColumn[i] ? SortState.SORT_EXPLICIT : SortState.SORT_NATIVE);
        if (!isSortedKeyColumn[i]) {
            dataChunk.setRowid_page_length(blockletInfoColumnar.getKeyBlockIndexLength()[rowIdIndex]);
            encodings.add(Encoding.INVERTED_INDEX);
            rowIdIndex++;
        }
        // TODO : Right now the encodings are happening at runtime. change as per
        // this encoders.
        dataChunk.setEncoders(encodings);
        colDataChunks.add(dataChunk);
    }
    for (int i = 0; i < blockletInfoColumnar.getMeasureLength().length; i++) {
        DataChunk2 dataChunk = new DataChunk2();
        dataChunk.setChunk_meta(getChunkCompressionMeta());
        dataChunk.setRowMajor(false);
        // TODO : Once schema PR is merged and information needs to be passed
        // here.
        dataChunk.setData_page_length(blockletInfoColumnar.getMeasureLength()[i]);
        // TODO : Right now the encodings are happening at runtime. change as per
        // this encoders.
        List<Encoding> encodings = new ArrayList<Encoding>();
        encodings.add(Encoding.DELTA);
        dataChunk.setEncoders(encodings);
        // TODO writing dummy presence meta need to set actual presence
        // meta
        PresenceMeta presenceMeta = new PresenceMeta();
        presenceMeta.setPresent_bit_streamIsSet(true);
        presenceMeta.setPresent_bit_stream(CompressorFactory.getInstance().getCompressor().compressByte(blockletInfoColumnar.getMeasureNullValueIndex()[i].toByteArray()));
        dataChunk.setPresence(presenceMeta);
        // TODO : PresenceMeta needs to be implemented and set here
        // dataChunk.setPresence(new PresenceMeta());
        // TODO : Need to write ValueCompression meta here.
        List<ByteBuffer> encoderMetaList = new ArrayList<ByteBuffer>();
        encoderMetaList.add(ByteBuffer.wrap(serializeEncoderMeta(createValueEncoderMeta(blockletInfoColumnar.getCompressionModel(), i))));
        dataChunk.setEncoder_meta(encoderMetaList);
        colDataChunks.add(dataChunk);
    }
    return colDataChunks;
}
Also used : DataChunk2(org.apache.carbondata.format.DataChunk2) ArrayList(java.util.ArrayList) Encoding(org.apache.carbondata.format.Encoding) PresenceMeta(org.apache.carbondata.format.PresenceMeta) ByteBuffer(java.nio.ByteBuffer)

Example 2 with Encoding

use of org.apache.carbondata.format.Encoding in project carbondata by apache.

the class DataFileFooterConverterTest method testReadDataFileFooter.

@Test
public void testReadDataFileFooter() throws Exception {
    DataFileFooterConverter dataFileFooterConverter = new DataFileFooterConverter();
    DataFileFooter dataFileFooter = new DataFileFooter();
    List<Integer> column_cardinalities = new ArrayList<>();
    column_cardinalities.add(new Integer("1"));
    column_cardinalities.add(new Integer("2"));
    column_cardinalities.add(new Integer("3"));
    org.apache.carbondata.format.SegmentInfo segmentInfo1 = new org.apache.carbondata.format.SegmentInfo(3, column_cardinalities);
    List<Encoding> encoders = new ArrayList<>();
    encoders.add(Encoding.INVERTED_INDEX);
    encoders.add(Encoding.BIT_PACKED);
    encoders.add(Encoding.DELTA);
    encoders.add(Encoding.DICTIONARY);
    encoders.add(Encoding.DIRECT_DICTIONARY);
    encoders.add(Encoding.RLE);
    ColumnSchema columnSchema = new ColumnSchema(DataType.INT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema1 = new ColumnSchema(DataType.ARRAY, "column", "3", true, encoders, true);
    ColumnSchema columnSchema2 = new ColumnSchema(DataType.DECIMAL, "column", "3", true, encoders, true);
    ColumnSchema columnSchema3 = new ColumnSchema(DataType.DOUBLE, "column", "3", true, encoders, true);
    ColumnSchema columnSchema4 = new ColumnSchema(DataType.LONG, "column", "3", true, encoders, true);
    ColumnSchema columnSchema5 = new ColumnSchema(DataType.SHORT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema6 = new ColumnSchema(DataType.STRUCT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema7 = new ColumnSchema(DataType.STRING, "column", "3", true, encoders, true);
    final List<ColumnSchema> columnSchemas = new ArrayList<>();
    columnSchemas.add(columnSchema);
    columnSchemas.add(columnSchema1);
    columnSchemas.add(columnSchema2);
    columnSchemas.add(columnSchema3);
    columnSchemas.add(columnSchema4);
    columnSchemas.add(columnSchema5);
    columnSchemas.add(columnSchema6);
    columnSchemas.add(columnSchema7);
    org.apache.carbondata.format.BlockletIndex blockletIndex1 = new org.apache.carbondata.format.BlockletIndex();
    List<org.apache.carbondata.format.BlockletIndex> blockletIndexArrayList = new ArrayList<>();
    blockletIndexArrayList.add(blockletIndex1);
    org.apache.carbondata.format.BlockletInfo blockletInfo = new org.apache.carbondata.format.BlockletInfo();
    List<org.apache.carbondata.format.BlockletInfo> blockletInfoArrayList = new ArrayList<>();
    blockletInfoArrayList.add(blockletInfo);
    final FileFooter fileFooter = new FileFooter(1, 3, columnSchemas, segmentInfo1, blockletIndexArrayList);
    fileFooter.setBlocklet_info_list(blockletInfoArrayList);
    BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
    blockletBTreeIndex.setStart_key("1".getBytes());
    blockletBTreeIndex.setEnd_key("3".getBytes());
    blockletIndex1.setB_tree_index(blockletBTreeIndex);
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    blockletMinMaxIndex.setMax_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 2)));
    blockletMinMaxIndex.setMin_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 1)));
    blockletIndex1.setMin_max_index(blockletMinMaxIndex);
    new MockUp<FileFactory>() {

        @SuppressWarnings("unused")
        @Mock
        public FileFactory.FileType getFileType(String path) {
            return FileFactory.FileType.LOCAL;
        }

        @SuppressWarnings("unused")
        @Mock
        public FileReader getFileHolder(FileFactory.FileType fileType) {
            return new FileReaderImpl();
        }
    };
    new MockUp<FileReaderImpl>() {

        @SuppressWarnings("unused")
        @Mock
        public long readLong(String filePath, long offset) {
            return 1;
        }
    };
    new MockUp<CarbonFooterReader>() {

        @SuppressWarnings("unused")
        @Mock
        public FileFooter readFooter() throws IOException {
            return fileFooter;
        }
    };
    SegmentInfo segmentInfo = new SegmentInfo();
    int[] arr = { 1, 2, 3 };
    segmentInfo.setColumnCardinality(arr);
    dataFileFooter.setNumberOfRows(3);
    dataFileFooter.setSegmentInfo(segmentInfo);
    TableBlockInfo info = new TableBlockInfo("/file.carbondata", 1, "0", new String[0], 1, ColumnarFormatVersion.V1, null);
    DataFileFooter result = dataFileFooterConverter.readDataFileFooter(info);
    assertEquals(result.getNumberOfRows(), 3);
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.format.ColumnSchema) MockUp(mockit.MockUp) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) FileReaderImpl(org.apache.carbondata.core.datastore.impl.FileReaderImpl) FileFooter(org.apache.carbondata.format.FileFooter) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) Encoding(org.apache.carbondata.format.Encoding) SegmentInfo(org.apache.carbondata.core.metadata.blocklet.SegmentInfo) Test(org.junit.Test)

Example 3 with Encoding

use of org.apache.carbondata.format.Encoding in project carbondata by apache.

the class AdaptiveDeltaFloatingCodec method createEncoder.

@Override
public ColumnPageEncoder createEncoder(Map<String, String> parameter) {
    final Compressor compressor = CompressorFactory.getInstance().getCompressor();
    return new ColumnPageEncoder() {

        @Override
        protected byte[] encodeData(ColumnPage input) throws MemoryException, IOException {
            if (encodedPage != null) {
                throw new IllegalStateException("already encoded");
            }
            encodedPage = ColumnPage.newPage(input.getColumnSpec(), targetDataType, input.getPageSize());
            input.convertValue(converter);
            byte[] result = encodedPage.compress(compressor);
            encodedPage.freeMemory();
            return result;
        }

        @Override
        protected List<Encoding> getEncodingList() {
            List<Encoding> encodings = new ArrayList<Encoding>();
            encodings.add(Encoding.ADAPTIVE_DELTA_FLOATING);
            return encodings;
        }

        @Override
        protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) {
            return new ColumnPageEncoderMeta(inputPage.getColumnSpec(), targetDataType, stats, compressor.getName());
        }
    };
}
Also used : ColumnPageEncoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder) LazyColumnPage(org.apache.carbondata.core.datastore.page.LazyColumnPage) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ColumnPageEncoderMeta(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta) ArrayList(java.util.ArrayList) Compressor(org.apache.carbondata.core.datastore.compression.Compressor) Encoding(org.apache.carbondata.format.Encoding)

Example 4 with Encoding

use of org.apache.carbondata.format.Encoding in project carbondata by apache.

the class AdaptiveIntegralCodec method createEncoder.

@Override
public ColumnPageEncoder createEncoder(Map<String, String> parameter) {
    final Compressor compressor = CompressorFactory.getInstance().getCompressor();
    return new ColumnPageEncoder() {

        @Override
        protected byte[] encodeData(ColumnPage input) throws MemoryException, IOException {
            if (encodedPage != null) {
                throw new IllegalStateException("already encoded");
            }
            encodedPage = ColumnPage.newPage(input.getColumnSpec(), targetDataType, input.getPageSize());
            input.convertValue(converter);
            byte[] result = encodedPage.compress(compressor);
            encodedPage.freeMemory();
            return result;
        }

        @Override
        protected List<Encoding> getEncodingList() {
            List<Encoding> encodings = new ArrayList<Encoding>();
            encodings.add(Encoding.ADAPTIVE_INTEGRAL);
            return encodings;
        }

        @Override
        protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) {
            return new ColumnPageEncoderMeta(inputPage.getColumnSpec(), targetDataType, stats, compressor.getName());
        }
    };
}
Also used : ColumnPageEncoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder) LazyColumnPage(org.apache.carbondata.core.datastore.page.LazyColumnPage) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ColumnPageEncoderMeta(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta) ArrayList(java.util.ArrayList) Compressor(org.apache.carbondata.core.datastore.compression.Compressor) Encoding(org.apache.carbondata.format.Encoding)

Example 5 with Encoding

use of org.apache.carbondata.format.Encoding in project carbondata by apache.

the class ComplexDimensionIndexCodec method createEncoder.

@Override
public ColumnPageEncoder createEncoder(Map<String, String> parameter) {
    return new IndexStorageEncoder() {

        @Override
        void encodeIndexStorage(ColumnPage inputPage) {
            IndexStorage indexStorage = new BlockIndexerStorageForShort(inputPage.getByteArrayPage(), false, false, false);
            byte[] flattened = ByteUtil.flatten(indexStorage.getDataPage());
            byte[] compressed = compressor.compressByte(flattened);
            super.indexStorage = indexStorage;
            super.compressedDataPage = compressed;
        }

        @Override
        protected List<Encoding> getEncodingList() {
            List<Encoding> encodings = new ArrayList<>();
            encodings.add(Encoding.DICTIONARY);
            encodings.add(Encoding.INVERTED_INDEX);
            return encodings;
        }
    };
}
Also used : ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ArrayList(java.util.ArrayList) BlockIndexerStorageForShort(org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForShort) Encoding(org.apache.carbondata.format.Encoding) IndexStorage(org.apache.carbondata.core.datastore.columnar.IndexStorage)

Aggregations

Encoding (org.apache.carbondata.format.Encoding)17 ArrayList (java.util.ArrayList)14 ByteBuffer (java.nio.ByteBuffer)7 ColumnPage (org.apache.carbondata.core.datastore.page.ColumnPage)7 BlockIndexerStorageForShort (org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForShort)4 IndexStorage (org.apache.carbondata.core.datastore.columnar.IndexStorage)4 BlockletMinMaxIndex (org.apache.carbondata.format.BlockletMinMaxIndex)4 PresenceMeta (org.apache.carbondata.format.PresenceMeta)4 BlockIndexerStorageForNoInvertedIndexForShort (org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForNoInvertedIndexForShort)3 Compressor (org.apache.carbondata.core.datastore.compression.Compressor)3 LazyColumnPage (org.apache.carbondata.core.datastore.page.LazyColumnPage)3 ColumnPageEncoder (org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder)3 ColumnPageEncoderMeta (org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta)3 ColumnSchema (org.apache.carbondata.format.ColumnSchema)3 DataChunk2 (org.apache.carbondata.format.DataChunk2)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInputStream (java.io.DataInputStream)2 MockUp (mockit.MockUp)2 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)2 ColumnPageDecoder (org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder)2