Search in sources :

Example 11 with Encoding

use of org.apache.carbondata.format.Encoding in project carbondata by apache.

the class CompressedDimensionChunkFileBasedReaderV3 method decodeDimensionByMeta.

private ColumnPage decodeDimensionByMeta(DataChunk2 pageMetadata, ByteBuffer pageData, int offset) throws IOException, MemoryException {
    List<Encoding> encodings = pageMetadata.getEncoders();
    List<ByteBuffer> encoderMetas = pageMetadata.getEncoder_meta();
    ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, encoderMetas);
    return decoder.decode(pageData.array(), offset, pageMetadata.data_page_length);
}
Also used : Encoding(org.apache.carbondata.format.Encoding) ColumnPageDecoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder) ByteBuffer(java.nio.ByteBuffer)

Example 12 with Encoding

use of org.apache.carbondata.format.Encoding in project carbondata by apache.

the class DataFileFooterConverterTest method testGetIndexInfo.

@Test
public void testGetIndexInfo() throws Exception {
    DataFileFooterConverter dataFileFooterConverter = new DataFileFooterConverter();
    final ThriftReader thriftReader = new ThriftReader("file");
    List<Encoding> encoders = new ArrayList<>();
    encoders.add(Encoding.INVERTED_INDEX);
    encoders.add(Encoding.BIT_PACKED);
    encoders.add(Encoding.DELTA);
    encoders.add(Encoding.DICTIONARY);
    encoders.add(Encoding.DIRECT_DICTIONARY);
    encoders.add(Encoding.RLE);
    ColumnSchema columnSchema = new ColumnSchema(DataType.INT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema1 = new ColumnSchema(DataType.ARRAY, "column", "3", true, encoders, true);
    ColumnSchema columnSchema2 = new ColumnSchema(DataType.DECIMAL, "column", "3", true, encoders, true);
    ColumnSchema columnSchema3 = new ColumnSchema(DataType.DOUBLE, "column", "3", true, encoders, true);
    ColumnSchema columnSchema4 = new ColumnSchema(DataType.LONG, "column", "3", true, encoders, true);
    ColumnSchema columnSchema5 = new ColumnSchema(DataType.SHORT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema6 = new ColumnSchema(DataType.STRUCT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema7 = new ColumnSchema(DataType.STRING, "column", "3", true, encoders, true);
    final List<ColumnSchema> columnSchemas = new ArrayList<>();
    columnSchemas.add(columnSchema);
    columnSchemas.add(columnSchema1);
    columnSchemas.add(columnSchema2);
    columnSchemas.add(columnSchema3);
    columnSchemas.add(columnSchema4);
    columnSchemas.add(columnSchema5);
    columnSchemas.add(columnSchema6);
    columnSchemas.add(columnSchema7);
    final BlockIndex blockIndex = new BlockIndex();
    blockIndex.setBlock_index(new org.apache.carbondata.format.BlockletIndex());
    org.apache.carbondata.format.BlockletIndex blockletIndex1 = new org.apache.carbondata.format.BlockletIndex();
    BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
    blockletBTreeIndex.setStart_key("1".getBytes());
    blockletBTreeIndex.setEnd_key("3".getBytes());
    blockletIndex1.setB_tree_index(blockletBTreeIndex);
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    blockletMinMaxIndex.setMax_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 2)));
    blockletMinMaxIndex.setMin_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 1)));
    blockletIndex1.setMin_max_index(blockletMinMaxIndex);
    blockIndex.setBlock_index(blockletIndex1);
    List<Integer> column_cardinalities = new ArrayList<>();
    column_cardinalities.add(new Integer("1"));
    final org.apache.carbondata.format.SegmentInfo segmentInfo1 = new org.apache.carbondata.format.SegmentInfo(3, column_cardinalities);
    new MockUp<CarbonIndexFileReader>() {

        boolean mockedHasNextStatus = true;

        @SuppressWarnings("unused")
        @Mock
        public boolean hasNext() throws IOException {
            boolean temp = mockedHasNextStatus;
            mockedHasNextStatus = false;
            return temp;
        }

        @SuppressWarnings("unused")
        @Mock
        public void openThriftReader(String filePath) throws IOException {
            thriftReader.open();
        }

        @SuppressWarnings("unused")
        @Mock
        public IndexHeader readIndexHeader() throws IOException {
            return new IndexHeader(1, columnSchemas, segmentInfo1);
        }

        @SuppressWarnings("unused")
        @Mock
        public BlockIndex readBlockIndexInfo() throws IOException {
            return blockIndex;
        }

        @SuppressWarnings("unused")
        @Mock
        public void closeThriftReader() {
            thriftReader.close();
        }
    };
    new MockUp<IndexHeader>() {

        @SuppressWarnings("unused")
        @Mock
        public List<ColumnSchema> getTable_columns() {
            return columnSchemas;
        }
    };
    ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream("1".getBytes());
    final DataInputStream dataInputStream = new DataInputStream(byteArrayInputStream);
    new MockUp<FileFactory>() {

        @SuppressWarnings("unused")
        @Mock
        public DataInputStream getDataInputStream(String path, FileFactory.FileType fileType, int bufferSize) {
            return dataInputStream;
        }
    };
    String[] arr = { "a", "b", "c" };
    String fileName = "/part-0-0_batchno0-0-1495074251740.carbondata";
    TableBlockInfo tableBlockInfo = new TableBlockInfo(fileName, 3, "id", arr, 3, ColumnarFormatVersion.V1, null);
    tableBlockInfo.getBlockletInfos().setNoOfBlockLets(3);
    List<TableBlockInfo> tableBlockInfoList = new ArrayList<>();
    tableBlockInfoList.add(tableBlockInfo);
    String idxFileName = "0_batchno0-0-1495074251740.carbonindex";
    List<DataFileFooter> dataFileFooterList = dataFileFooterConverter.getIndexInfo(idxFileName, tableBlockInfoList);
    byte[] exp = dataFileFooterList.get(0).getBlockletIndex().getBtreeIndex().getStartKey();
    byte[] res = "1".getBytes();
    for (int i = 0; i < exp.length; i++) {
        assertEquals(exp[i], res[i]);
    }
}
Also used : IndexHeader(org.apache.carbondata.format.IndexHeader) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.format.ColumnSchema) MockUp(mockit.MockUp) ThriftReader(org.apache.carbondata.core.reader.ThriftReader) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) Encoding(org.apache.carbondata.format.Encoding) DataInputStream(java.io.DataInputStream) BlockIndex(org.apache.carbondata.format.BlockIndex) ByteArrayInputStream(java.io.ByteArrayInputStream) SegmentInfo(org.apache.carbondata.core.metadata.blocklet.SegmentInfo) Test(org.junit.Test)

Example 13 with Encoding

use of org.apache.carbondata.format.Encoding in project carbondata by apache.

the class CarbonMetadataUtilTest method setUp.

@BeforeClass
public static void setUp() {
    objMaxArr = new Long[6];
    objMaxArr[0] = new Long("111111");
    objMaxArr[1] = new Long("121111");
    objMaxArr[2] = new Long("131111");
    objMaxArr[3] = new Long("141111");
    objMaxArr[4] = new Long("151111");
    objMaxArr[5] = new Long("161111");
    objMinArr = new Long[6];
    objMinArr[0] = new Long("119");
    objMinArr[1] = new Long("121");
    objMinArr[2] = new Long("131");
    objMinArr[3] = new Long("141");
    objMinArr[4] = new Long("151");
    objMinArr[5] = new Long("161");
    objDecimal = new int[] { 0, 0, 0, 0, 0, 0 };
    columnSchemaList = new ArrayList<>();
    List<Encoding> encodingList = new ArrayList<>();
    encodingList.add(Encoding.BIT_PACKED);
    encodingList.add(Encoding.DELTA);
    encodingList.add(Encoding.INVERTED_INDEX);
    encodingList.add(Encoding.DIRECT_DICTIONARY);
    byteArr = "412111".getBytes();
    byte[] byteArr1 = "321".getBytes();
    byte[] byteArr2 = "356".getBytes();
    byteBufferList = new ArrayList<>();
    ByteBuffer bb = ByteBuffer.allocate(byteArr.length);
    bb.put(byteArr);
    ByteBuffer bb1 = ByteBuffer.allocate(byteArr1.length);
    bb1.put(byteArr1);
    ByteBuffer bb2 = ByteBuffer.allocate(byteArr2.length);
    bb2.put(byteArr2);
    byteBufferList.add(bb);
    byteBufferList.add(bb1);
    byteBufferList.add(bb2);
    DataChunk dataChunk = new DataChunk();
    dataChunk.setEncoders(encodingList);
    dataChunk.setEncoder_meta(byteBufferList);
    List<DataChunk> dataChunkList = new ArrayList<>();
    dataChunkList.add(dataChunk);
    dataChunkList.add(dataChunk);
    BlockletInfo blockletInfo = new BlockletInfo();
    blockletInfo.setColumn_data_chunks(dataChunkList);
    blockletInfoList = new ArrayList<>();
    blockletInfoList.add(blockletInfo);
    blockletInfoList.add(blockletInfo);
    ValueEncoderMeta meta = CarbonTestUtil.createValueEncoderMeta();
    meta.setDecimal(5);
    meta.setMinValue(objMinArr);
    meta.setMaxValue(objMaxArr);
    meta.setType(org.apache.carbondata.core.metadata.datatype.DataType.DOUBLE_MEASURE_CHAR);
    List<Encoding> encoders = new ArrayList<>();
    encoders.add(Encoding.INVERTED_INDEX);
    encoders.add(Encoding.BIT_PACKED);
    encoders.add(Encoding.DELTA);
    encoders.add(Encoding.DICTIONARY);
    encoders.add(Encoding.DIRECT_DICTIONARY);
    encoders.add(Encoding.RLE);
    ColumnSchema columnSchema = new ColumnSchema(DataType.INT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema1 = new ColumnSchema(DataType.ARRAY, "column", "3", true, encoders, true);
    ColumnSchema columnSchema2 = new ColumnSchema(DataType.DECIMAL, "column", "3", true, encoders, true);
    ColumnSchema columnSchema3 = new ColumnSchema(DataType.DOUBLE, "column", "3", true, encoders, true);
    ColumnSchema columnSchema4 = new ColumnSchema(DataType.LONG, "column", "3", true, encoders, true);
    ColumnSchema columnSchema5 = new ColumnSchema(DataType.SHORT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema6 = new ColumnSchema(DataType.STRUCT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema7 = new ColumnSchema(DataType.STRING, "column", "3", true, encoders, true);
    columnSchemas = new ArrayList<>();
    columnSchemas.add(columnSchema);
    columnSchemas.add(columnSchema1);
    columnSchemas.add(columnSchema2);
    columnSchemas.add(columnSchema3);
    columnSchemas.add(columnSchema4);
    columnSchemas.add(columnSchema5);
    columnSchemas.add(columnSchema6);
    columnSchemas.add(columnSchema7);
}
Also used : ArrayList(java.util.ArrayList) BlockletInfo(org.apache.carbondata.format.BlockletInfo) Encoding(org.apache.carbondata.format.Encoding) DataChunk(org.apache.carbondata.format.DataChunk) ColumnSchema(org.apache.carbondata.format.ColumnSchema) ValueEncoderMeta(org.apache.carbondata.core.metadata.ValueEncoderMeta) ByteBuffer(java.nio.ByteBuffer) BeforeClass(org.junit.BeforeClass)

Example 14 with Encoding

use of org.apache.carbondata.format.Encoding in project carbondata by apache.

the class EncodingFactory method createDecoder.

/**
 * Return new decoder based on encoder metadata read from file
 */
public ColumnPageDecoder createDecoder(List<Encoding> encodings, List<ByteBuffer> encoderMetas) throws IOException {
    assert (encodings.size() == 1);
    assert (encoderMetas.size() == 1);
    Encoding encoding = encodings.get(0);
    byte[] encoderMeta = encoderMetas.get(0).array();
    ByteArrayInputStream stream = new ByteArrayInputStream(encoderMeta);
    DataInputStream in = new DataInputStream(stream);
    if (encoding == DIRECT_COMPRESS) {
        ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
        metadata.readFields(in);
        return new DirectCompressCodec(metadata.getStoreDataType()).createDecoder(metadata);
    } else if (encoding == ADAPTIVE_INTEGRAL) {
        ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
        metadata.readFields(in);
        SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
        return new AdaptiveIntegralCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats).createDecoder(metadata);
    } else if (encoding == ADAPTIVE_DELTA_INTEGRAL) {
        ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
        metadata.readFields(in);
        SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
        return new AdaptiveDeltaIntegralCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats).createDecoder(metadata);
    } else if (encoding == ADAPTIVE_FLOATING) {
        ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
        metadata.readFields(in);
        SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
        return new AdaptiveFloatingCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats).createDecoder(metadata);
    } else if (encoding == ADAPTIVE_DELTA_FLOATING) {
        ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
        metadata.readFields(in);
        SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
        return new AdaptiveDeltaFloatingCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats).createDecoder(metadata);
    } else if (encoding == RLE_INTEGRAL) {
        RLEEncoderMeta metadata = new RLEEncoderMeta();
        metadata.readFields(in);
        return new RLECodec().createDecoder(metadata);
    } else if (encoding == BOOL_BYTE) {
        ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
        metadata.readFields(in);
        return new DirectCompressCodec(metadata.getStoreDataType()).createDecoder(metadata);
    } else {
        // for backward compatibility
        ValueEncoderMeta metadata = CarbonUtil.deserializeEncoderMetaV3(encoderMeta);
        return createDecoderLegacy(metadata);
    }
}
Also used : RLEEncoderMeta(org.apache.carbondata.core.datastore.page.encoding.rle.RLEEncoderMeta) Encoding(org.apache.carbondata.format.Encoding) DataInputStream(java.io.DataInputStream) AdaptiveDeltaIntegralCodec(org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveDeltaIntegralCodec) AdaptiveFloatingCodec(org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveFloatingCodec) SimpleStatsResult(org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult) AdaptiveIntegralCodec(org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveIntegralCodec) ByteArrayInputStream(java.io.ByteArrayInputStream) RLECodec(org.apache.carbondata.core.datastore.page.encoding.rle.RLECodec) DirectCompressCodec(org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec) AdaptiveDeltaFloatingCodec(org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveDeltaFloatingCodec) ValueEncoderMeta(org.apache.carbondata.core.metadata.ValueEncoderMeta)

Example 15 with Encoding

use of org.apache.carbondata.format.Encoding in project carbondata by apache.

the class AdaptiveFloatingCodec method createEncoder.

@Override
public ColumnPageEncoder createEncoder(Map<String, String> parameter) {
    final Compressor compressor = CompressorFactory.getInstance().getCompressor();
    return new ColumnPageEncoder() {

        @Override
        protected byte[] encodeData(ColumnPage input) throws MemoryException, IOException {
            if (encodedPage != null) {
                throw new IllegalStateException("already encoded");
            }
            encodedPage = ColumnPage.newPage(input.getColumnSpec(), targetDataType, input.getPageSize());
            input.convertValue(converter);
            byte[] result = encodedPage.compress(compressor);
            encodedPage.freeMemory();
            return result;
        }

        @Override
        protected List<Encoding> getEncodingList() {
            List<Encoding> encodings = new ArrayList<Encoding>();
            encodings.add(Encoding.ADAPTIVE_FLOATING);
            return encodings;
        }

        @Override
        protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) {
            return new ColumnPageEncoderMeta(inputPage.getColumnSpec(), targetDataType, stats, compressor.getName());
        }
    };
}
Also used : ColumnPageEncoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder) LazyColumnPage(org.apache.carbondata.core.datastore.page.LazyColumnPage) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ColumnPageEncoderMeta(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta) ArrayList(java.util.ArrayList) Compressor(org.apache.carbondata.core.datastore.compression.Compressor) Encoding(org.apache.carbondata.format.Encoding)

Aggregations

Encoding (org.apache.carbondata.format.Encoding)17 ArrayList (java.util.ArrayList)14 ByteBuffer (java.nio.ByteBuffer)7 ColumnPage (org.apache.carbondata.core.datastore.page.ColumnPage)7 BlockIndexerStorageForShort (org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForShort)4 IndexStorage (org.apache.carbondata.core.datastore.columnar.IndexStorage)4 BlockletMinMaxIndex (org.apache.carbondata.format.BlockletMinMaxIndex)4 PresenceMeta (org.apache.carbondata.format.PresenceMeta)4 BlockIndexerStorageForNoInvertedIndexForShort (org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForNoInvertedIndexForShort)3 Compressor (org.apache.carbondata.core.datastore.compression.Compressor)3 LazyColumnPage (org.apache.carbondata.core.datastore.page.LazyColumnPage)3 ColumnPageEncoder (org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder)3 ColumnPageEncoderMeta (org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta)3 ColumnSchema (org.apache.carbondata.format.ColumnSchema)3 DataChunk2 (org.apache.carbondata.format.DataChunk2)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInputStream (java.io.DataInputStream)2 MockUp (mockit.MockUp)2 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)2 ColumnPageDecoder (org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder)2