Search in sources :

Example 1 with DictionaryThresholdReachedException

use of org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException in project carbondata by apache.

the class TestDictionaryStore method testDictionaryStoreWithMoreThanThreshold.

@Test
public void testDictionaryStoreWithMoreThanThreshold() {
    DictionaryStore dictionaryStore = new MapBasedDictionaryStore(10);
    boolean isException = false;
    for (int i = 0; i < 15; i++) {
        try {
            dictionaryStore.putIfAbsent((i + "").getBytes());
        } catch (DictionaryThresholdReachedException e) {
            isException = true;
            break;
        }
    }
    Assert.assertTrue(isException);
    Assert.assertTrue(dictionaryStore.isThresholdReached());
}
Also used : MapBasedDictionaryStore(org.apache.carbondata.core.localdictionary.dictionaryholder.MapBasedDictionaryStore) MapBasedDictionaryStore(org.apache.carbondata.core.localdictionary.dictionaryholder.MapBasedDictionaryStore) DictionaryStore(org.apache.carbondata.core.localdictionary.dictionaryholder.DictionaryStore) DictionaryThresholdReachedException(org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException) Test(org.junit.Test)

Example 2 with DictionaryThresholdReachedException

use of org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException in project carbondata by apache.

the class LocalDictColumnPage method putBytes.

/**
 * Below method will be used to add column data to page
 *
 * @param rowId row number
 * @param bytes actual data
 */
@Override
public void putBytes(int rowId, byte[] bytes) {
    if (null != pageLevelDictionary) {
        try {
            actualDataColumnPage.putBytes(rowId, bytes);
            byte[] input;
            DataType dataType = actualDataColumnPage.columnPageEncoderMeta.getStoreDataType();
            if (dataType == DataTypes.STRING) {
                ByteBuffer byteBuffer = ByteBuffer.allocate(bytes.length + 2);
                byteBuffer.putShort((short) bytes.length);
                byteBuffer.put(bytes);
                input = byteBuffer.array();
            } else if (dataType == DataTypes.VARCHAR || dataType == DataTypes.BINARY) {
                ByteBuffer byteBuffer = ByteBuffer.allocate(bytes.length + 4);
                byteBuffer.putInt(bytes.length);
                byteBuffer.put(bytes);
                input = byteBuffer.array();
            } else {
                input = bytes;
            }
            dummyKey[0] = pageLevelDictionary.getDictionaryValue(input);
            encodedDataColumnPage.putBytes(rowId, keyGenerator.generateKey(dummyKey));
        } catch (DictionaryThresholdReachedException e) {
            LOGGER.warn("Local Dictionary threshold reached for the column: " + actualDataColumnPage.getColumnSpec().getFieldName() + ", " + e.getMessage());
            pageLevelDictionary = null;
            encodedDataColumnPage.freeMemory();
            encodedDataColumnPage = null;
        }
    } else {
        actualDataColumnPage.putBytes(rowId, bytes);
    }
    if (pageSize <= rowId) {
        pageSize = rowId + 1;
    }
}
Also used : DataType(org.apache.carbondata.core.metadata.datatype.DataType) DictionaryThresholdReachedException(org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException) ByteBuffer(java.nio.ByteBuffer)

Example 3 with DictionaryThresholdReachedException

use of org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException in project carbondata by apache.

the class TestPageLevelDictionary method testPageLevelDictionaryContainsOnlyUsedDictionaryValuesWhenMultiplePagesUseSameDictionary.

@Test
public void testPageLevelDictionaryContainsOnlyUsedDictionaryValuesWhenMultiplePagesUseSameDictionary() {
    LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
    String columnName = "column1";
    PageLevelDictionary pageLevelDictionary1 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    byte[][] validateData = new byte[10][];
    int index = 0;
    try {
        for (int i = 1; i <= 5; i++) {
            byte[] data = ("vishal" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            validateData[index] = data;
            pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
            index++;
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    PageLevelDictionary pageLevelDictionary2 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    try {
        for (int i = 1; i <= 5; i++) {
            byte[] data = ("vikas" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        for (int i = 6; i <= 10; i++) {
            byte[] data = ("vishal" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            validateData[index] = data;
            pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
            index++;
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        for (int i = 6; i <= 10; i++) {
            byte[] data = ("vikas" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        LocalDictionaryChunk localDictionaryChunkForBlocklet = pageLevelDictionary1.getLocalDictionaryChunkForBlocklet();
        List<Encoding> encodings = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoders();
        EncodingFactory encodingFactory = DefaultEncodingFactory.getInstance();
        List<ByteBuffer> encoderMetas = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoder_meta();
        ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, encoderMetas, compressorName);
        ColumnPage decode = decoder.decode(localDictionaryChunkForBlocklet.getDictionary_data(), 0, localDictionaryChunkForBlocklet.getDictionary_data().length);
        BitSet bitSet = BitSet.valueOf(CompressorFactory.getInstance().getCompressor(compressorName).unCompressByte(localDictionaryChunkForBlocklet.getDictionary_values()));
        Assert.assertTrue(bitSet.cardinality() == validateData.length);
        for (int i = 0; i < validateData.length; i++) {
            Assert.assertTrue(Arrays.equals(decode.getBytes(i), validateData[i]));
        }
    } catch (IOException e) {
        Assert.assertTrue(false);
    }
}
Also used : LocalDictionaryChunk(org.apache.carbondata.format.LocalDictionaryChunk) BitSet(java.util.BitSet) Encoding(org.apache.carbondata.format.Encoding) DefaultEncodingFactory(org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory) EncodingFactory(org.apache.carbondata.core.datastore.page.encoding.EncodingFactory) ColumnPageDecoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder) IOException(java.io.IOException) DictionaryThresholdReachedException(org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException) ByteBuffer(java.nio.ByteBuffer) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) LocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator) Test(org.junit.Test)

Example 4 with DictionaryThresholdReachedException

use of org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException in project carbondata by apache.

the class TestPageLevelDictionary method testPageLevelDictionaryGenerateDataIsGenertingProperDictionaryValues.

@Test
public void testPageLevelDictionaryGenerateDataIsGenertingProperDictionaryValues() {
    LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
    String columnName = "column1";
    PageLevelDictionary pageLevelDictionary = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    try {
        for (int i = 1; i <= 1000; i++) {
            Assert.assertTrue((i + 1) == pageLevelDictionary.getDictionaryValue(("" + i).getBytes()));
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
}
Also used : ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) LocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator) DictionaryThresholdReachedException(org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) Test(org.junit.Test)

Example 5 with DictionaryThresholdReachedException

use of org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException in project carbondata by apache.

the class TestPageLevelDictionary method testPageLevelDictionaryContainsOnlyUsedDictionaryValues.

@Test
public void testPageLevelDictionaryContainsOnlyUsedDictionaryValues() {
    LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
    String columnName = "column1";
    PageLevelDictionary pageLevelDictionary1 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    byte[][] validateData = new byte[500][];
    try {
        for (int i = 1; i <= 500; i++) {
            byte[] data = ("vishal" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            validateData[i - 1] = data;
            pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    PageLevelDictionary pageLevelDictionary2 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    try {
        for (int i = 1; i <= 500; i++) {
            byte[] data = ("vikas" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        LocalDictionaryChunk localDictionaryChunkForBlocklet = pageLevelDictionary1.getLocalDictionaryChunkForBlocklet();
        List<Encoding> encodings = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoders();
        EncodingFactory encodingFactory = DefaultEncodingFactory.getInstance();
        List<ByteBuffer> encoderMetas = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoder_meta();
        ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, encoderMetas, compressorName);
        ColumnPage decode = decoder.decode(localDictionaryChunkForBlocklet.getDictionary_data(), 0, localDictionaryChunkForBlocklet.getDictionary_data().length);
        for (int i = 0; i < 500; i++) {
            Arrays.equals(decode.getBytes(i), validateData[i]);
        }
    } catch (IOException e) {
        Assert.assertTrue(false);
    }
}
Also used : LocalDictionaryChunk(org.apache.carbondata.format.LocalDictionaryChunk) Encoding(org.apache.carbondata.format.Encoding) DefaultEncodingFactory(org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory) EncodingFactory(org.apache.carbondata.core.datastore.page.encoding.EncodingFactory) ColumnPageDecoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder) IOException(java.io.IOException) DictionaryThresholdReachedException(org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException) ByteBuffer(java.nio.ByteBuffer) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) LocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator) Test(org.junit.Test)

Aggregations

DictionaryThresholdReachedException (org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException)7 Test (org.junit.Test)6 ColumnLocalDictionaryGenerator (org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator)4 LocalDictionaryGenerator (org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator)4 ByteBuffer (java.nio.ByteBuffer)3 IOException (java.io.IOException)2 ColumnPage (org.apache.carbondata.core.datastore.page.ColumnPage)2 ColumnPageDecoder (org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder)2 DefaultEncodingFactory (org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory)2 EncodingFactory (org.apache.carbondata.core.datastore.page.encoding.EncodingFactory)2 DictionaryStore (org.apache.carbondata.core.localdictionary.dictionaryholder.DictionaryStore)2 MapBasedDictionaryStore (org.apache.carbondata.core.localdictionary.dictionaryholder.MapBasedDictionaryStore)2 Encoding (org.apache.carbondata.format.Encoding)2 LocalDictionaryChunk (org.apache.carbondata.format.LocalDictionaryChunk)2 BitSet (java.util.BitSet)1 DataType (org.apache.carbondata.core.metadata.datatype.DataType)1