Search in sources :

Example 1 with LocalDictionaryGenerator

use of org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator in project carbondata by apache.

the class TestLocalDictionaryGenerator method testColumnLocalDictionaryGeneratorForNullValueIsPresentWithoutAddingAnyData.

@Test
public void testColumnLocalDictionaryGeneratorForNullValueIsPresentWithoutAddingAnyData() {
    LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
    ByteBuffer byteBuffer = ByteBuffer.allocate(2 + CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length);
    byteBuffer.putShort((short) CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length);
    byteBuffer.put(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY);
    Assert.assertTrue(Arrays.equals(generator.getDictionaryKeyBasedOnValue(1), byteBuffer.array()));
}
Also used : ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) LocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator) ByteBuffer(java.nio.ByteBuffer) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) Test(org.junit.Test)

Example 2 with LocalDictionaryGenerator

use of org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator in project carbondata by apache.

the class ComplexColumnPage method initialize.

/**
 * below method will be used to initialize the column page of complex type
 * @param columnToDictMap dictionary map
 * @param pageSize number of records
 */
public void initialize(Map<String, LocalDictionaryGenerator> columnToDictMap, int pageSize, String columnCompressor) {
    DataType dataType;
    for (int i = 0; i < this.columnPages.length; i++) {
        LocalDictionaryGenerator localDictionaryGenerator = columnToDictMap.get(complexColumnInfoList.get(i).getColumnNames());
        TableSpec.ColumnSpec spec = getColumnSpec(i, localDictionaryGenerator);
        if (null == localDictionaryGenerator) {
            dataType = complexColumnInfoList.get(i).getColumnDataTypes();
            if (isColumnPageBasedOnDataType(i)) {
                // no dictionary primitive types need adaptive encoding,
                // hence store as actual value instead of byte array
                this.columnPages[i] = ColumnPage.newPage(new ColumnPageEncoderMeta(spec, dataType, columnCompressor), pageSize);
                this.columnPages[i].setStatsCollector(PrimitivePageStatsCollector.newInstance(dataType));
            } else {
                this.columnPages[i] = ColumnPage.newPage(new ColumnPageEncoderMeta(spec, DataTypes.BYTE_ARRAY, columnCompressor), pageSize);
                this.columnPages[i].setStatsCollector(new DummyStatsCollector());
            }
        } else {
            this.columnPages[i] = ColumnPage.newLocalDictPage(new ColumnPageEncoderMeta(spec, DataTypes.BYTE_ARRAY, columnCompressor), pageSize, localDictionaryGenerator, true);
            this.columnPages[i].setStatsCollector(new DummyStatsCollector());
        }
    }
}
Also used : TableSpec(org.apache.carbondata.core.datastore.TableSpec) ColumnPageEncoderMeta(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta) DummyStatsCollector(org.apache.carbondata.core.datastore.page.statistics.DummyStatsCollector) DataType(org.apache.carbondata.core.metadata.datatype.DataType) LocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator)

Example 3 with LocalDictionaryGenerator

use of org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator in project carbondata by apache.

the class TestPageLevelDictionary method testPageLevelDictionaryContainsOnlyUsedDictionaryValuesWhenMultiplePagesUseSameDictionary.

@Test
public void testPageLevelDictionaryContainsOnlyUsedDictionaryValuesWhenMultiplePagesUseSameDictionary() {
    LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
    String columnName = "column1";
    PageLevelDictionary pageLevelDictionary1 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    byte[][] validateData = new byte[10][];
    int index = 0;
    try {
        for (int i = 1; i <= 5; i++) {
            byte[] data = ("vishal" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            validateData[index] = data;
            pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
            index++;
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    PageLevelDictionary pageLevelDictionary2 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    try {
        for (int i = 1; i <= 5; i++) {
            byte[] data = ("vikas" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        for (int i = 6; i <= 10; i++) {
            byte[] data = ("vishal" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            validateData[index] = data;
            pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
            index++;
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        for (int i = 6; i <= 10; i++) {
            byte[] data = ("vikas" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        LocalDictionaryChunk localDictionaryChunkForBlocklet = pageLevelDictionary1.getLocalDictionaryChunkForBlocklet();
        List<Encoding> encodings = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoders();
        EncodingFactory encodingFactory = DefaultEncodingFactory.getInstance();
        List<ByteBuffer> encoderMetas = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoder_meta();
        ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, encoderMetas, compressorName);
        ColumnPage decode = decoder.decode(localDictionaryChunkForBlocklet.getDictionary_data(), 0, localDictionaryChunkForBlocklet.getDictionary_data().length);
        BitSet bitSet = BitSet.valueOf(CompressorFactory.getInstance().getCompressor(compressorName).unCompressByte(localDictionaryChunkForBlocklet.getDictionary_values()));
        Assert.assertTrue(bitSet.cardinality() == validateData.length);
        for (int i = 0; i < validateData.length; i++) {
            Assert.assertTrue(Arrays.equals(decode.getBytes(i), validateData[i]));
        }
    } catch (IOException e) {
        Assert.assertTrue(false);
    }
}
Also used : LocalDictionaryChunk(org.apache.carbondata.format.LocalDictionaryChunk) BitSet(java.util.BitSet) Encoding(org.apache.carbondata.format.Encoding) DefaultEncodingFactory(org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory) EncodingFactory(org.apache.carbondata.core.datastore.page.encoding.EncodingFactory) ColumnPageDecoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder) IOException(java.io.IOException) DictionaryThresholdReachedException(org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException) ByteBuffer(java.nio.ByteBuffer) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) LocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator) Test(org.junit.Test)

Example 4 with LocalDictionaryGenerator

use of org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator in project carbondata by apache.

the class TestPageLevelDictionary method testPageLevelDictionaryGenerateDataIsGenertingProperDictionaryValues.

@Test
public void testPageLevelDictionaryGenerateDataIsGenertingProperDictionaryValues() {
    LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
    String columnName = "column1";
    PageLevelDictionary pageLevelDictionary = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    try {
        for (int i = 1; i <= 1000; i++) {
            Assert.assertTrue((i + 1) == pageLevelDictionary.getDictionaryValue(("" + i).getBytes()));
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
}
Also used : ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) LocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator) DictionaryThresholdReachedException(org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) Test(org.junit.Test)

Example 5 with LocalDictionaryGenerator

use of org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator in project carbondata by apache.

the class TestPageLevelDictionary method testPageLevelDictionaryContainsOnlyUsedDictionaryValues.

@Test
public void testPageLevelDictionaryContainsOnlyUsedDictionaryValues() {
    LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
    String columnName = "column1";
    PageLevelDictionary pageLevelDictionary1 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    byte[][] validateData = new byte[500][];
    try {
        for (int i = 1; i <= 500; i++) {
            byte[] data = ("vishal" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            validateData[i - 1] = data;
            pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    PageLevelDictionary pageLevelDictionary2 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    try {
        for (int i = 1; i <= 500; i++) {
            byte[] data = ("vikas" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        LocalDictionaryChunk localDictionaryChunkForBlocklet = pageLevelDictionary1.getLocalDictionaryChunkForBlocklet();
        List<Encoding> encodings = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoders();
        EncodingFactory encodingFactory = DefaultEncodingFactory.getInstance();
        List<ByteBuffer> encoderMetas = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoder_meta();
        ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, encoderMetas, compressorName);
        ColumnPage decode = decoder.decode(localDictionaryChunkForBlocklet.getDictionary_data(), 0, localDictionaryChunkForBlocklet.getDictionary_data().length);
        for (int i = 0; i < 500; i++) {
            Arrays.equals(decode.getBytes(i), validateData[i]);
        }
    } catch (IOException e) {
        Assert.assertTrue(false);
    }
}
Also used : LocalDictionaryChunk(org.apache.carbondata.format.LocalDictionaryChunk) Encoding(org.apache.carbondata.format.Encoding) DefaultEncodingFactory(org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory) EncodingFactory(org.apache.carbondata.core.datastore.page.encoding.EncodingFactory) ColumnPageDecoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder) IOException(java.io.IOException) DictionaryThresholdReachedException(org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException) ByteBuffer(java.nio.ByteBuffer) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) LocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator) Test(org.junit.Test)

Aggregations

LocalDictionaryGenerator (org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator)8 ColumnLocalDictionaryGenerator (org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator)7 Test (org.junit.Test)6 DictionaryThresholdReachedException (org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException)5 ByteBuffer (java.nio.ByteBuffer)3 IOException (java.io.IOException)2 ColumnPage (org.apache.carbondata.core.datastore.page.ColumnPage)2 ColumnPageDecoder (org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder)2 DefaultEncodingFactory (org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory)2 EncodingFactory (org.apache.carbondata.core.datastore.page.encoding.EncodingFactory)2 Encoding (org.apache.carbondata.format.Encoding)2 LocalDictionaryChunk (org.apache.carbondata.format.LocalDictionaryChunk)2 BitSet (java.util.BitSet)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 TableSpec (org.apache.carbondata.core.datastore.TableSpec)1 ColumnPageEncoderMeta (org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta)1 DummyStatsCollector (org.apache.carbondata.core.datastore.page.statistics.DummyStatsCollector)1 DataType (org.apache.carbondata.core.metadata.datatype.DataType)1 SchemaEvolutionEntry (org.apache.carbondata.core.metadata.schema.SchemaEvolutionEntry)1