Search in sources :

Example 16 with DirectCompressCodec

use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.

the class TestEncodingFactory method testSelectProperDeltaType.

@Test
public void testSelectProperDeltaType() {
    PrimitivePageStatsCollector primitivePageStatsCollector = PrimitivePageStatsCollector.newInstance(DataTypes.LONG);
    // for Byte
    primitivePageStatsCollector.update((long) Byte.MAX_VALUE);
    ColumnPageCodec columnPageCodec = DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector, false, null);
    assert (columnPageCodec instanceof AdaptiveIntegralCodec);
    assert (DataTypes.BYTE == ((AdaptiveIntegralCodec) columnPageCodec).getTargetDataType());
    // for Short
    primitivePageStatsCollector.update((long) Short.MAX_VALUE);
    columnPageCodec = DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector, false, null);
    assert (columnPageCodec instanceof AdaptiveIntegralCodec);
    assert (DataTypes.SHORT == ((AdaptiveIntegralCodec) columnPageCodec).getTargetDataType());
    // for int
    primitivePageStatsCollector.update((long) Integer.MAX_VALUE);
    columnPageCodec = DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector, false, null);
    assert (columnPageCodec instanceof AdaptiveIntegralCodec);
    assert (DataTypes.INT == ((AdaptiveIntegralCodec) columnPageCodec).getTargetDataType());
    // for long
    primitivePageStatsCollector.update(Long.MAX_VALUE);
    columnPageCodec = DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector, false, null);
    assert (columnPageCodec instanceof DirectCompressCodec);
    assert ("DirectCompressCodec".equals(columnPageCodec.getName()));
}
Also used : PrimitivePageStatsCollector(org.apache.carbondata.core.datastore.page.statistics.PrimitivePageStatsCollector) AdaptiveIntegralCodec(org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveIntegralCodec) DirectCompressCodec(org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec) Test(org.junit.Test)

Example 17 with DirectCompressCodec

use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.

the class PageLevelDictionary method getLocalDictionaryChunkForBlocklet.

/**
 * Below method will be used to get the local dictionary chunk for writing
 * @TODO Support for numeric data type dictionary exclude columns
 * @return encoded local dictionary chunk
 * @throws IOException
 * in case of problem in encoding
 */
public LocalDictionaryChunk getLocalDictionaryChunkForBlocklet() throws IOException {
    // TODO support for actual data type dictionary ColumnSPEC
    ColumnType columnType = ColumnType.PLAIN_VALUE;
    boolean isVarcharType = false;
    int lvSize = CarbonCommonConstants.SHORT_SIZE_IN_BYTE;
    if (DataTypes.VARCHAR == dataType) {
        columnType = ColumnType.PLAIN_LONG_VALUE;
        lvSize = CarbonCommonConstants.INT_SIZE_IN_BYTE;
        isVarcharType = true;
    }
    TableSpec.ColumnSpec spec = TableSpec.ColumnSpec.newInstance(columnName, DataTypes.BYTE_ARRAY, columnType);
    ColumnPage dictionaryColumnPage = ColumnPage.newPage(new ColumnPageEncoderMeta(spec, DataTypes.BYTE_ARRAY, columnCompressor), usedDictionaryValues.cardinality());
    // TODO support data type specific stats collector for numeric data types
    dictionaryColumnPage.setStatsCollector(new DummyStatsCollector());
    int rowId = 0;
    ByteBuffer byteBuffer = null;
    for (int i = usedDictionaryValues.nextSetBit(0); i >= 0; i = usedDictionaryValues.nextSetBit(i + 1)) {
        if (!isComplexTypePrimitive) {
            dictionaryColumnPage.putData(rowId++, localDictionaryGenerator.getDictionaryKeyBasedOnValue(i));
        } else {
            byte[] dictionaryKeyBasedOnValue = localDictionaryGenerator.getDictionaryKeyBasedOnValue(i);
            byteBuffer = ByteBuffer.allocate(lvSize + dictionaryKeyBasedOnValue.length);
            if (!isVarcharType) {
                byteBuffer.putShort((short) dictionaryKeyBasedOnValue.length);
            } else {
                byteBuffer.putInt(dictionaryKeyBasedOnValue.length);
            }
            byteBuffer.put(dictionaryKeyBasedOnValue);
            dictionaryColumnPage.putData(rowId++, byteBuffer.array());
        }
    }
    // creating a encoder
    ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
    // get encoded dictionary values
    LocalDictionaryChunk localDictionaryChunk = encoder.encodeDictionary(dictionaryColumnPage);
    // set compressed dictionary values
    localDictionaryChunk.setDictionary_values(CompressorFactory.getInstance().getCompressor(columnCompressor).compressByte(usedDictionaryValues.toByteArray()));
    // free the dictionary page memory
    dictionaryColumnPage.freeMemory();
    return localDictionaryChunk;
}
Also used : ColumnPageEncoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder) TableSpec(org.apache.carbondata.core.datastore.TableSpec) ColumnType(org.apache.carbondata.core.datastore.ColumnType) LocalDictionaryChunk(org.apache.carbondata.format.LocalDictionaryChunk) DummyStatsCollector(org.apache.carbondata.core.datastore.page.statistics.DummyStatsCollector) ByteBuffer(java.nio.ByteBuffer) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ColumnPageEncoderMeta(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta) DirectCompressCodec(org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec)

Aggregations

DirectCompressCodec (org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec)17 DataType (org.apache.carbondata.core.metadata.datatype.DataType)11 AdaptiveIntegralCodec (org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveIntegralCodec)10 AdaptiveDeltaIntegralCodec (org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveDeltaIntegralCodec)9 AdaptiveDeltaFloatingCodec (org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveDeltaFloatingCodec)6 AdaptiveFloatingCodec (org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveFloatingCodec)6 SimpleStatsResult (org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult)6 TableSpec (org.apache.carbondata.core.datastore.TableSpec)4 PrimitivePageStatsCollector (org.apache.carbondata.core.datastore.page.statistics.PrimitivePageStatsCollector)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInputStream (java.io.DataInputStream)2 ColumnPage (org.apache.carbondata.core.datastore.page.ColumnPage)2 RLECodec (org.apache.carbondata.core.datastore.page.encoding.rle.RLECodec)2 RLEEncoderMeta (org.apache.carbondata.core.datastore.page.encoding.rle.RLEEncoderMeta)2 ValueEncoderMeta (org.apache.carbondata.core.metadata.ValueEncoderMeta)2 Encoding (org.apache.carbondata.format.Encoding)2 Test (org.junit.Test)2 ByteBuffer (java.nio.ByteBuffer)1 ColumnType (org.apache.carbondata.core.datastore.ColumnType)1 ComplexColumnPage (org.apache.carbondata.core.datastore.page.ComplexColumnPage)1