Search in sources :

Example 1 with EncodingFactory

use of org.apache.carbondata.core.datastore.page.encoding.EncodingFactory in project carbondata by apache.

the class TestPageLevelDictionary method testPageLevelDictionaryContainsOnlyUsedDictionaryValuesWhenMultiplePagesUseSameDictionary.

@Test
public void testPageLevelDictionaryContainsOnlyUsedDictionaryValuesWhenMultiplePagesUseSameDictionary() {
    LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
    String columnName = "column1";
    PageLevelDictionary pageLevelDictionary1 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    byte[][] validateData = new byte[10][];
    int index = 0;
    try {
        for (int i = 1; i <= 5; i++) {
            byte[] data = ("vishal" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            validateData[index] = data;
            pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
            index++;
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    PageLevelDictionary pageLevelDictionary2 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    try {
        for (int i = 1; i <= 5; i++) {
            byte[] data = ("vikas" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        for (int i = 6; i <= 10; i++) {
            byte[] data = ("vishal" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            validateData[index] = data;
            pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
            index++;
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        for (int i = 6; i <= 10; i++) {
            byte[] data = ("vikas" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        LocalDictionaryChunk localDictionaryChunkForBlocklet = pageLevelDictionary1.getLocalDictionaryChunkForBlocklet();
        List<Encoding> encodings = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoders();
        EncodingFactory encodingFactory = DefaultEncodingFactory.getInstance();
        List<ByteBuffer> encoderMetas = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoder_meta();
        ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, encoderMetas, compressorName);
        ColumnPage decode = decoder.decode(localDictionaryChunkForBlocklet.getDictionary_data(), 0, localDictionaryChunkForBlocklet.getDictionary_data().length);
        BitSet bitSet = BitSet.valueOf(CompressorFactory.getInstance().getCompressor(compressorName).unCompressByte(localDictionaryChunkForBlocklet.getDictionary_values()));
        Assert.assertTrue(bitSet.cardinality() == validateData.length);
        for (int i = 0; i < validateData.length; i++) {
            Assert.assertTrue(Arrays.equals(decode.getBytes(i), validateData[i]));
        }
    } catch (IOException e) {
        Assert.assertTrue(false);
    }
}
Also used : LocalDictionaryChunk(org.apache.carbondata.format.LocalDictionaryChunk) BitSet(java.util.BitSet) Encoding(org.apache.carbondata.format.Encoding) DefaultEncodingFactory(org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory) EncodingFactory(org.apache.carbondata.core.datastore.page.encoding.EncodingFactory) ColumnPageDecoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder) IOException(java.io.IOException) DictionaryThresholdReachedException(org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException) ByteBuffer(java.nio.ByteBuffer) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) LocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator) Test(org.junit.Test)

Example 2 with EncodingFactory

use of org.apache.carbondata.core.datastore.page.encoding.EncodingFactory in project carbondata by apache.

the class TestPageLevelDictionary method testPageLevelDictionaryContainsOnlyUsedDictionaryValues.

@Test
public void testPageLevelDictionaryContainsOnlyUsedDictionaryValues() {
    LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
    String columnName = "column1";
    PageLevelDictionary pageLevelDictionary1 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    byte[][] validateData = new byte[500][];
    try {
        for (int i = 1; i <= 500; i++) {
            byte[] data = ("vishal" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            validateData[i - 1] = data;
            pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    PageLevelDictionary pageLevelDictionary2 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    try {
        for (int i = 1; i <= 500; i++) {
            byte[] data = ("vikas" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        LocalDictionaryChunk localDictionaryChunkForBlocklet = pageLevelDictionary1.getLocalDictionaryChunkForBlocklet();
        List<Encoding> encodings = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoders();
        EncodingFactory encodingFactory = DefaultEncodingFactory.getInstance();
        List<ByteBuffer> encoderMetas = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoder_meta();
        ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, encoderMetas, compressorName);
        ColumnPage decode = decoder.decode(localDictionaryChunkForBlocklet.getDictionary_data(), 0, localDictionaryChunkForBlocklet.getDictionary_data().length);
        for (int i = 0; i < 500; i++) {
            Arrays.equals(decode.getBytes(i), validateData[i]);
        }
    } catch (IOException e) {
        Assert.assertTrue(false);
    }
}
Also used : LocalDictionaryChunk(org.apache.carbondata.format.LocalDictionaryChunk) Encoding(org.apache.carbondata.format.Encoding) DefaultEncodingFactory(org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory) EncodingFactory(org.apache.carbondata.core.datastore.page.encoding.EncodingFactory) ColumnPageDecoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder) IOException(java.io.IOException) DictionaryThresholdReachedException(org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException) ByteBuffer(java.nio.ByteBuffer) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) LocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator) Test(org.junit.Test)

Aggregations

IOException (java.io.IOException)2 ByteBuffer (java.nio.ByteBuffer)2 ColumnPage (org.apache.carbondata.core.datastore.page.ColumnPage)2 ColumnPageDecoder (org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder)2 DefaultEncodingFactory (org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory)2 EncodingFactory (org.apache.carbondata.core.datastore.page.encoding.EncodingFactory)2 DictionaryThresholdReachedException (org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException)2 ColumnLocalDictionaryGenerator (org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator)2 LocalDictionaryGenerator (org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator)2 Encoding (org.apache.carbondata.format.Encoding)2 LocalDictionaryChunk (org.apache.carbondata.format.LocalDictionaryChunk)2 Test (org.junit.Test)2 BitSet (java.util.BitSet)1