Search in sources :

Example 11 with DirectCompressCodec

use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.

the class DefaultEncodingFactory method selectCodecByAlgorithmForFloating.

// choose between upscale adaptive encoder or upscale delta adaptive encoder,
// based on whose target data type size is smaller
static ColumnPageCodec selectCodecByAlgorithmForFloating(SimpleStatsResult stats, boolean isComplexPrimitive, TableSpec.ColumnSpec columnSpec) {
    DataType srcDataType = stats.getDataType();
    double maxValue;
    double minValue;
    if (srcDataType == DataTypes.FLOAT) {
        maxValue = (float) stats.getMax();
        minValue = (float) stats.getMin();
    } else {
        maxValue = (double) stats.getMax();
        minValue = (double) stats.getMin();
    }
    int decimalCount = stats.getDecimalCount();
    // to do that decimal count should be actual count instead of -1.
    if (isComplexPrimitive && decimalCount == -1 && stats instanceof PrimitivePageStatsCollector) {
        decimalCount = ((PrimitivePageStatsCollector) stats).getDecimalForComplexPrimitive();
    }
    // Here we should use the Max abs as max to getDatatype, let's say -1 and -10000000, -1 is max,
    // but we can't use -1 to getDatatype, we should use -10000000.
    double absMaxValue = Math.max(Math.abs(maxValue), Math.abs(minValue));
    if (srcDataType == DataTypes.FLOAT && decimalCount == 0) {
        return getColumnPageCodec(stats, isComplexPrimitive, columnSpec, srcDataType, maxValue, minValue, decimalCount, absMaxValue);
    } else if (decimalCount == 0) {
        // short, int, long
        return selectCodecByAlgorithmForIntegral(stats, false, columnSpec);
    } else if (decimalCount < 0 && !isComplexPrimitive) {
        return new DirectCompressCodec(DataTypes.DOUBLE);
    } else {
        return getColumnPageCodec(stats, isComplexPrimitive, columnSpec, srcDataType, maxValue, minValue, decimalCount, absMaxValue);
    }
}
Also used : PrimitivePageStatsCollector(org.apache.carbondata.core.datastore.page.statistics.PrimitivePageStatsCollector) DirectCompressCodec(org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec) DataType(org.apache.carbondata.core.metadata.datatype.DataType)

Example 12 with DirectCompressCodec

use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.

the class DefaultEncodingFactory method selectCodecByAlgorithmForIntegral.

/**
 * choose between adaptive encoder or delta adaptive encoder, based on whose target data type
 * size is smaller
 */
static ColumnPageCodec selectCodecByAlgorithmForIntegral(SimpleStatsResult stats, boolean isComplexPrimitive, TableSpec.ColumnSpec columnSpec) {
    DataType srcDataType = stats.getDataType();
    DataType adaptiveDataType = fitMinMax(stats.getDataType(), stats.getMax(), stats.getMin());
    DataType deltaDataType = fitDelta(stats.getDataType(), stats.getMax(), stats.getMin());
    // for complex primitive, if source and destination data type is same, use adaptive encoding.
    if (!isComplexPrimitive) {
        // on that get size in bytes
        if (Math.min(adaptiveDataType.getSizeInBytes(), deltaDataType.getSizeInBytes()) == srcDataType.getSizeInBytes()) {
            // no effect to use adaptive or delta, use compression only
            return new DirectCompressCodec(stats.getDataType());
        }
    }
    boolean isInvertedIndex = isInvertedIndex(isComplexPrimitive, columnSpec);
    if (adaptiveDataType.getSizeInBytes() <= deltaDataType.getSizeInBytes()) {
        // choose adaptive encoding
        return new AdaptiveIntegralCodec(stats.getDataType(), adaptiveDataType, stats, isInvertedIndex);
    } else {
        // choose delta adaptive encoding
        return new AdaptiveDeltaIntegralCodec(stats.getDataType(), deltaDataType, stats, isInvertedIndex);
    }
}
Also used : AdaptiveIntegralCodec(org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveIntegralCodec) DirectCompressCodec(org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec) DataType(org.apache.carbondata.core.metadata.datatype.DataType) AdaptiveDeltaIntegralCodec(org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveDeltaIntegralCodec)

Example 13 with DirectCompressCodec

use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.

the class DefaultEncodingFactory method createEncoderForMeasureOrNoDictionaryPrimitive.

private ColumnPageEncoder createEncoderForMeasureOrNoDictionaryPrimitive(ColumnPage columnPage, TableSpec.ColumnSpec columnSpec) {
    SimpleStatsResult stats = columnPage.getStatistics();
    DataType dataType = stats.getDataType();
    if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.BYTE_ARRAY || columnPage.getDataType() == DataTypes.BINARY) {
        return new DirectCompressCodec(columnPage.getDataType()).createEncoder(null);
    } else if (dataType == DataTypes.BYTE || dataType == DataTypes.SHORT || dataType == DataTypes.INT || dataType == DataTypes.LONG || dataType == DataTypes.TIMESTAMP) {
        return selectCodecByAlgorithmForIntegral(stats, false, columnSpec).createEncoder(null);
    } else if (DataTypes.isDecimal(dataType)) {
        return createEncoderForDecimalDataTypeMeasure(columnPage, columnSpec);
    } else if (dataType == DataTypes.FLOAT || dataType == DataTypes.DOUBLE) {
        return selectCodecByAlgorithmForFloating(stats, false, columnSpec).createEncoder(null);
    } else {
        throw new RuntimeException("unsupported data type: " + stats.getDataType());
    }
}
Also used : SimpleStatsResult(org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult) DirectCompressCodec(org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec) DataType(org.apache.carbondata.core.metadata.datatype.DataType)

Example 14 with DirectCompressCodec

use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.

the class DefaultEncodingFactory method selectCodecByAlgorithmForDecimal.

/**
 * choose between adaptive encoder or delta adaptive encoder, based on whose target data type
 * size is smaller for decimal data type
 */
static ColumnPageCodec selectCodecByAlgorithmForDecimal(SimpleStatsResult stats, DecimalConverterFactory.DecimalConverterType decimalConverterType, TableSpec.ColumnSpec columnSpec) {
    DataType srcDataType = stats.getDataType();
    DataType adaptiveDataType = fitMinMaxForDecimalType(stats.getDataType(), stats.getMax(), stats.getMin(), decimalConverterType);
    DataType deltaDataType;
    if (adaptiveDataType == DataTypes.LONG) {
        deltaDataType = DataTypes.LONG;
    } else {
        deltaDataType = fitDeltaForDecimalType(stats.getDataType(), stats.getMax(), stats.getMin(), decimalConverterType);
    }
    // that get size in bytes
    if (Math.min(adaptiveDataType.getSizeInBytes(), deltaDataType.getSizeInBytes()) == srcDataType.getSizeInBytes()) {
        // no effect to use adaptive or delta, use compression only
        return new DirectCompressCodec(stats.getDataType());
    }
    if (adaptiveDataType.getSizeInBytes() <= deltaDataType.getSizeInBytes()) {
        // choose adaptive encoding
        return new AdaptiveIntegralCodec(stats.getDataType(), adaptiveDataType, stats, isInvertedIndex(columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE, columnSpec));
    } else {
        // choose delta adaptive encoding
        return new AdaptiveDeltaIntegralCodec(stats.getDataType(), deltaDataType, stats, isInvertedIndex(columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE, columnSpec));
    }
}
Also used : AdaptiveIntegralCodec(org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveIntegralCodec) DirectCompressCodec(org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec) DataType(org.apache.carbondata.core.metadata.datatype.DataType) AdaptiveDeltaIntegralCodec(org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveDeltaIntegralCodec)

Example 15 with DirectCompressCodec

use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.

the class EncodingFactory method createDecoder.

/**
 * Return new decoder based on encoder metadata read from file
 * @param encodings encodings used to decode the page
 * @param encoderMetas metadata of encodings to decode the data
 * @param compressor Compressor name which will be used to decode data.
 * @param fullVectorFill whether the flow should go to fill the given vector completely while
 *                       decoding the data itself.
 * @return decoder to decode page.
 * @throws IOException
 */
public ColumnPageDecoder createDecoder(List<Encoding> encodings, List<ByteBuffer> encoderMetas, String compressor, boolean fullVectorFill) throws IOException {
    assert (encodings.size() >= 1);
    assert (encoderMetas.size() == 1);
    boolean isComplexPrimitiveIntLengthEncoding = encodings.contains(Encoding.INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY);
    Encoding encoding = encodings.get(0);
    byte[] encoderMeta = encoderMetas.get(0).array();
    ByteArrayInputStream stream = new ByteArrayInputStream(encoderMeta);
    DataInputStream in = new DataInputStream(stream);
    if (encoding == DIRECT_COMPRESS || encoding == DIRECT_COMPRESS_VARCHAR) {
        ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
        metadata.setFillCompleteVector(fullVectorFill);
        metadata.readFields(in);
        DirectCompressCodec directCompressCodec = new DirectCompressCodec(metadata.getStoreDataType());
        directCompressCodec.setComplexPrimitiveIntLengthEncoding(isComplexPrimitiveIntLengthEncoding);
        return directCompressCodec.createDecoder(metadata);
    } else if (encoding == ADAPTIVE_INTEGRAL) {
        ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
        metadata.setFillCompleteVector(fullVectorFill);
        metadata.readFields(in);
        SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
        return new AdaptiveIntegralCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats, encodings.contains(Encoding.INVERTED_INDEX)).createDecoder(metadata);
    } else if (encoding == ADAPTIVE_DELTA_INTEGRAL) {
        ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
        metadata.setFillCompleteVector(fullVectorFill);
        metadata.readFields(in);
        SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
        return new AdaptiveDeltaIntegralCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats, encodings.contains(Encoding.INVERTED_INDEX)).createDecoder(metadata);
    } else if (encoding == ADAPTIVE_FLOATING) {
        ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
        metadata.setFillCompleteVector(fullVectorFill);
        metadata.readFields(in);
        SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
        return new AdaptiveFloatingCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats, encodings.contains(Encoding.INVERTED_INDEX)).createDecoder(metadata);
    } else if (encoding == ADAPTIVE_DELTA_FLOATING) {
        ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
        metadata.setFillCompleteVector(fullVectorFill);
        metadata.readFields(in);
        SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
        return new AdaptiveDeltaFloatingCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats, encodings.contains(Encoding.INVERTED_INDEX)).createDecoder(metadata);
    } else if (encoding == RLE_INTEGRAL) {
        RLEEncoderMeta metadata = new RLEEncoderMeta();
        metadata.readFields(in);
        return new RLECodec().createDecoder(metadata);
    } else if (encoding == BOOL_BYTE) {
        ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
        metadata.setFillCompleteVector(fullVectorFill);
        metadata.readFields(in);
        return new DirectCompressCodec(metadata.getStoreDataType()).createDecoder(metadata);
    } else {
        // for backward compatibility
        ValueEncoderMeta metadata = CarbonUtil.deserializeEncoderMetaV3(encoderMeta);
        return createDecoderLegacy(metadata, compressor, fullVectorFill);
    }
}
Also used : RLEEncoderMeta(org.apache.carbondata.core.datastore.page.encoding.rle.RLEEncoderMeta) Encoding(org.apache.carbondata.format.Encoding) DataInputStream(java.io.DataInputStream) AdaptiveDeltaIntegralCodec(org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveDeltaIntegralCodec) AdaptiveFloatingCodec(org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveFloatingCodec) SimpleStatsResult(org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult) AdaptiveIntegralCodec(org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveIntegralCodec) ByteArrayInputStream(java.io.ByteArrayInputStream) RLECodec(org.apache.carbondata.core.datastore.page.encoding.rle.RLECodec) DirectCompressCodec(org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec) AdaptiveDeltaFloatingCodec(org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveDeltaFloatingCodec) ValueEncoderMeta(org.apache.carbondata.core.metadata.ValueEncoderMeta)

Aggregations

DirectCompressCodec (org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec)17 DataType (org.apache.carbondata.core.metadata.datatype.DataType)11 AdaptiveIntegralCodec (org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveIntegralCodec)10 AdaptiveDeltaIntegralCodec (org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveDeltaIntegralCodec)9 AdaptiveDeltaFloatingCodec (org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveDeltaFloatingCodec)6 AdaptiveFloatingCodec (org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveFloatingCodec)6 SimpleStatsResult (org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult)6 TableSpec (org.apache.carbondata.core.datastore.TableSpec)4 PrimitivePageStatsCollector (org.apache.carbondata.core.datastore.page.statistics.PrimitivePageStatsCollector)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInputStream (java.io.DataInputStream)2 ColumnPage (org.apache.carbondata.core.datastore.page.ColumnPage)2 RLECodec (org.apache.carbondata.core.datastore.page.encoding.rle.RLECodec)2 RLEEncoderMeta (org.apache.carbondata.core.datastore.page.encoding.rle.RLEEncoderMeta)2 ValueEncoderMeta (org.apache.carbondata.core.metadata.ValueEncoderMeta)2 Encoding (org.apache.carbondata.format.Encoding)2 Test (org.junit.Test)2 ByteBuffer (java.nio.ByteBuffer)1 ColumnType (org.apache.carbondata.core.datastore.ColumnType)1 ComplexColumnPage (org.apache.carbondata.core.datastore.page.ComplexColumnPage)1