Search in sources :

Example 1 with LocalDictionaryChunk

use of org.apache.carbondata.format.LocalDictionaryChunk in project carbondata by apache.

the class ColumnPageEncoder method encodeDictionary.

/**
 * Below method to encode the dictionary page
 * @param dictionaryPage
 * dictionary column page
 * @return local dictionary chunk
 * @throws IOException
 * Problem in encoding
 */
public LocalDictionaryChunk encodeDictionary(ColumnPage dictionaryPage) throws IOException {
    LocalDictionaryChunk localDictionaryChunk = new LocalDictionaryChunk();
    localDictionaryChunk.setDictionary_data(encodeData(dictionaryPage));
    LocalDictionaryChunkMeta localDictionaryChunkMeta = new LocalDictionaryChunkMeta();
    localDictionaryChunkMeta.setEncoders(getEncodingList());
    localDictionaryChunkMeta.setEncoder_meta(buildEncoderMeta(dictionaryPage));
    localDictionaryChunk.setDictionary_meta(localDictionaryChunkMeta);
    return localDictionaryChunk;
}
Also used : LocalDictionaryChunk(org.apache.carbondata.format.LocalDictionaryChunk) LocalDictionaryChunkMeta(org.apache.carbondata.format.LocalDictionaryChunkMeta)

Example 2 with LocalDictionaryChunk

use of org.apache.carbondata.format.LocalDictionaryChunk in project carbondata by apache.

the class TestPageLevelDictionary method testPageLevelDictionaryContainsOnlyUsedDictionaryValuesWhenMultiplePagesUseSameDictionary.

@Test
public void testPageLevelDictionaryContainsOnlyUsedDictionaryValuesWhenMultiplePagesUseSameDictionary() {
    LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
    String columnName = "column1";
    PageLevelDictionary pageLevelDictionary1 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    byte[][] validateData = new byte[10][];
    int index = 0;
    try {
        for (int i = 1; i <= 5; i++) {
            byte[] data = ("vishal" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            validateData[index] = data;
            pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
            index++;
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    PageLevelDictionary pageLevelDictionary2 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    try {
        for (int i = 1; i <= 5; i++) {
            byte[] data = ("vikas" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        for (int i = 6; i <= 10; i++) {
            byte[] data = ("vishal" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            validateData[index] = data;
            pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
            index++;
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        for (int i = 6; i <= 10; i++) {
            byte[] data = ("vikas" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        LocalDictionaryChunk localDictionaryChunkForBlocklet = pageLevelDictionary1.getLocalDictionaryChunkForBlocklet();
        List<Encoding> encodings = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoders();
        EncodingFactory encodingFactory = DefaultEncodingFactory.getInstance();
        List<ByteBuffer> encoderMetas = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoder_meta();
        ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, encoderMetas, compressorName);
        ColumnPage decode = decoder.decode(localDictionaryChunkForBlocklet.getDictionary_data(), 0, localDictionaryChunkForBlocklet.getDictionary_data().length);
        BitSet bitSet = BitSet.valueOf(CompressorFactory.getInstance().getCompressor(compressorName).unCompressByte(localDictionaryChunkForBlocklet.getDictionary_values()));
        Assert.assertTrue(bitSet.cardinality() == validateData.length);
        for (int i = 0; i < validateData.length; i++) {
            Assert.assertTrue(Arrays.equals(decode.getBytes(i), validateData[i]));
        }
    } catch (IOException e) {
        Assert.assertTrue(false);
    }
}
Also used : LocalDictionaryChunk(org.apache.carbondata.format.LocalDictionaryChunk) BitSet(java.util.BitSet) Encoding(org.apache.carbondata.format.Encoding) DefaultEncodingFactory(org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory) EncodingFactory(org.apache.carbondata.core.datastore.page.encoding.EncodingFactory) ColumnPageDecoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder) IOException(java.io.IOException) DictionaryThresholdReachedException(org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException) ByteBuffer(java.nio.ByteBuffer) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) LocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator) Test(org.junit.Test)

Example 3 with LocalDictionaryChunk

use of org.apache.carbondata.format.LocalDictionaryChunk in project carbondata by apache.

the class TestPageLevelDictionary method testPageLevelDictionaryContainsOnlyUsedDictionaryValues.

@Test
public void testPageLevelDictionaryContainsOnlyUsedDictionaryValues() {
    LocalDictionaryGenerator generator = new ColumnLocalDictionaryGenerator(1000, 2);
    String columnName = "column1";
    PageLevelDictionary pageLevelDictionary1 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    byte[][] validateData = new byte[500][];
    try {
        for (int i = 1; i <= 500; i++) {
            byte[] data = ("vishal" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            validateData[i - 1] = data;
            pageLevelDictionary1.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    PageLevelDictionary pageLevelDictionary2 = new PageLevelDictionary(generator, columnName, DataTypes.STRING, false, compressorName);
    try {
        for (int i = 1; i <= 500; i++) {
            byte[] data = ("vikas" + i).getBytes();
            ByteBuffer byteBuffer = ByteBuffer.allocate(data.length + 2);
            byteBuffer.putShort((short) data.length);
            byteBuffer.put(data);
            pageLevelDictionary2.getDictionaryValue(byteBuffer.array());
        }
        Assert.assertTrue(true);
    } catch (DictionaryThresholdReachedException e) {
        Assert.assertTrue(false);
    }
    try {
        LocalDictionaryChunk localDictionaryChunkForBlocklet = pageLevelDictionary1.getLocalDictionaryChunkForBlocklet();
        List<Encoding> encodings = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoders();
        EncodingFactory encodingFactory = DefaultEncodingFactory.getInstance();
        List<ByteBuffer> encoderMetas = localDictionaryChunkForBlocklet.getDictionary_meta().getEncoder_meta();
        ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, encoderMetas, compressorName);
        ColumnPage decode = decoder.decode(localDictionaryChunkForBlocklet.getDictionary_data(), 0, localDictionaryChunkForBlocklet.getDictionary_data().length);
        for (int i = 0; i < 500; i++) {
            Arrays.equals(decode.getBytes(i), validateData[i]);
        }
    } catch (IOException e) {
        Assert.assertTrue(false);
    }
}
Also used : LocalDictionaryChunk(org.apache.carbondata.format.LocalDictionaryChunk) Encoding(org.apache.carbondata.format.Encoding) DefaultEncodingFactory(org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory) EncodingFactory(org.apache.carbondata.core.datastore.page.encoding.EncodingFactory) ColumnPageDecoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder) IOException(java.io.IOException) DictionaryThresholdReachedException(org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException) ByteBuffer(java.nio.ByteBuffer) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ColumnLocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator) LocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator) Test(org.junit.Test)

Example 4 with LocalDictionaryChunk

use of org.apache.carbondata.format.LocalDictionaryChunk in project carbondata by apache.

the class PageLevelDictionary method getLocalDictionaryChunkForBlocklet.

/**
 * Below method will be used to get the local dictionary chunk for writing
 * @TODO Support for numeric data type dictionary exclude columns
 * @return encoded local dictionary chunk
 * @throws IOException
 * in case of problem in encoding
 */
public LocalDictionaryChunk getLocalDictionaryChunkForBlocklet() throws IOException {
    // TODO support for actual data type dictionary ColumnSPEC
    ColumnType columnType = ColumnType.PLAIN_VALUE;
    boolean isVarcharType = false;
    int lvSize = CarbonCommonConstants.SHORT_SIZE_IN_BYTE;
    if (DataTypes.VARCHAR == dataType) {
        columnType = ColumnType.PLAIN_LONG_VALUE;
        lvSize = CarbonCommonConstants.INT_SIZE_IN_BYTE;
        isVarcharType = true;
    }
    TableSpec.ColumnSpec spec = TableSpec.ColumnSpec.newInstance(columnName, DataTypes.BYTE_ARRAY, columnType);
    ColumnPage dictionaryColumnPage = ColumnPage.newPage(new ColumnPageEncoderMeta(spec, DataTypes.BYTE_ARRAY, columnCompressor), usedDictionaryValues.cardinality());
    // TODO support data type specific stats collector for numeric data types
    dictionaryColumnPage.setStatsCollector(new DummyStatsCollector());
    int rowId = 0;
    ByteBuffer byteBuffer = null;
    for (int i = usedDictionaryValues.nextSetBit(0); i >= 0; i = usedDictionaryValues.nextSetBit(i + 1)) {
        if (!isComplexTypePrimitive) {
            dictionaryColumnPage.putData(rowId++, localDictionaryGenerator.getDictionaryKeyBasedOnValue(i));
        } else {
            byte[] dictionaryKeyBasedOnValue = localDictionaryGenerator.getDictionaryKeyBasedOnValue(i);
            byteBuffer = ByteBuffer.allocate(lvSize + dictionaryKeyBasedOnValue.length);
            if (!isVarcharType) {
                byteBuffer.putShort((short) dictionaryKeyBasedOnValue.length);
            } else {
                byteBuffer.putInt(dictionaryKeyBasedOnValue.length);
            }
            byteBuffer.put(dictionaryKeyBasedOnValue);
            dictionaryColumnPage.putData(rowId++, byteBuffer.array());
        }
    }
    // creating a encoder
    ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
    // get encoded dictionary values
    LocalDictionaryChunk localDictionaryChunk = encoder.encodeDictionary(dictionaryColumnPage);
    // set compressed dictionary values
    localDictionaryChunk.setDictionary_values(CompressorFactory.getInstance().getCompressor(columnCompressor).compressByte(usedDictionaryValues.toByteArray()));
    // free the dictionary page memory
    dictionaryColumnPage.freeMemory();
    return localDictionaryChunk;
}
Also used : ColumnPageEncoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder) TableSpec(org.apache.carbondata.core.datastore.TableSpec) ColumnType(org.apache.carbondata.core.datastore.ColumnType) LocalDictionaryChunk(org.apache.carbondata.format.LocalDictionaryChunk) DummyStatsCollector(org.apache.carbondata.core.datastore.page.statistics.DummyStatsCollector) ByteBuffer(java.nio.ByteBuffer) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ColumnPageEncoderMeta(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta) DirectCompressCodec(org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec)

Example 5 with LocalDictionaryChunk

use of org.apache.carbondata.format.LocalDictionaryChunk in project carbondata by apache.

the class CarbonTestUtil method validateDictionary.

public static Boolean validateDictionary(DimensionRawColumnChunk rawColumnPage, String[] data) throws IOException {
    LocalDictionaryChunk local_dictionary = rawColumnPage.getDataChunkV3().local_dictionary;
    if (null != local_dictionary) {
        String compressorName = CarbonMetadataUtil.getCompressorNameFromChunkMeta(rawColumnPage.getDataChunkV3().getData_chunk_list().get(0).getChunk_meta());
        List<org.apache.carbondata.format.Encoding> encodings = local_dictionary.getDictionary_meta().encoders;
        DefaultEncodingFactory encodingFactory = (DefaultEncodingFactory) DefaultEncodingFactory.getInstance();
        ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, local_dictionary.getDictionary_meta().getEncoder_meta(), compressorName);
        LazyColumnPage dictionaryPage = (LazyColumnPage) decoder.decode(local_dictionary.getDictionary_data(), 0, local_dictionary.getDictionary_data().length);
        HashMap<DictionaryByteArrayWrapper, Integer> dictionaryMap = new HashMap<>();
        BitSet usedDictionaryValues = BitSet.valueOf(CompressorFactory.getInstance().getCompressor(compressorName).unCompressByte(local_dictionary.getDictionary_values()));
        int index = 0;
        int i = usedDictionaryValues.nextSetBit(0);
        while (i >= 0) {
            dictionaryMap.put(new DictionaryByteArrayWrapper(dictionaryPage.getBytes(index)), i);
            i = usedDictionaryValues.nextSetBit(i + 1);
            index += 1;
        }
        for (i = 0; i < data.length; i++) {
            if (null == dictionaryMap.get(new DictionaryByteArrayWrapper(data[i].getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET))))) {
                return false;
            }
        }
        return true;
    }
    return false;
}
Also used : DictionaryByteArrayWrapper(org.apache.carbondata.core.cache.dictionary.DictionaryByteArrayWrapper) LocalDictionaryChunk(org.apache.carbondata.format.LocalDictionaryChunk) DefaultEncodingFactory(org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory) HashMap(java.util.HashMap) BitSet(java.util.BitSet) ColumnPageDecoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder) LazyColumnPage(org.apache.carbondata.core.datastore.page.LazyColumnPage)

Aggregations

LocalDictionaryChunk (org.apache.carbondata.format.LocalDictionaryChunk)5 ByteBuffer (java.nio.ByteBuffer)3 ColumnPage (org.apache.carbondata.core.datastore.page.ColumnPage)3 ColumnPageDecoder (org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder)3 DefaultEncodingFactory (org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory)3 IOException (java.io.IOException)2 BitSet (java.util.BitSet)2 EncodingFactory (org.apache.carbondata.core.datastore.page.encoding.EncodingFactory)2 DictionaryThresholdReachedException (org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException)2 ColumnLocalDictionaryGenerator (org.apache.carbondata.core.localdictionary.generator.ColumnLocalDictionaryGenerator)2 LocalDictionaryGenerator (org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator)2 Encoding (org.apache.carbondata.format.Encoding)2 Test (org.junit.Test)2 HashMap (java.util.HashMap)1 DictionaryByteArrayWrapper (org.apache.carbondata.core.cache.dictionary.DictionaryByteArrayWrapper)1 ColumnType (org.apache.carbondata.core.datastore.ColumnType)1 TableSpec (org.apache.carbondata.core.datastore.TableSpec)1 LazyColumnPage (org.apache.carbondata.core.datastore.page.LazyColumnPage)1 ColumnPageEncoder (org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder)1 ColumnPageEncoderMeta (org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta)1