use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.
the class DefaultEncodingFactory method selectCodecByAlgorithmForFloating.
// choose between upscale adaptive encoder or upscale delta adaptive encoder,
// based on whose target data type size is smaller
static ColumnPageCodec selectCodecByAlgorithmForFloating(SimpleStatsResult stats) {
DataType srcDataType = stats.getDataType();
double maxValue = (double) stats.getMax();
double minValue = (double) stats.getMin();
int decimalCount = stats.getDecimalCount();
// Here we should use the Max abs as max to getDatatype, let's say -1 and -10000000, -1 is max,
// but we can't use -1 to getDatatype, we should use -10000000.
double absMaxValue = Math.max(Math.abs(maxValue), Math.abs(minValue));
if (decimalCount == 0) {
// short, int, long
return selectCodecByAlgorithmForIntegral(stats);
} else if (decimalCount < 0) {
return new DirectCompressCodec(DataTypes.DOUBLE);
} else {
// double
long max = (long) (Math.pow(10, decimalCount) * absMaxValue);
DataType adaptiveDataType = fitLongMinMax(max, 0);
DataType deltaDataType = compareMinMaxAndSelectDataType((long) (Math.pow(10, decimalCount) * (maxValue - minValue)));
if (adaptiveDataType.getSizeInBytes() > deltaDataType.getSizeInBytes()) {
return new AdaptiveDeltaFloatingCodec(srcDataType, deltaDataType, stats);
} else if (adaptiveDataType.getSizeInBytes() < DataTypes.DOUBLE.getSizeInBytes()) {
return new AdaptiveFloatingCodec(srcDataType, adaptiveDataType, stats);
} else {
return new DirectCompressCodec(DataTypes.DOUBLE);
}
}
}
use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.
the class DefaultEncodingFactory method createEncoderForMeasure.
private ColumnPageEncoder createEncoderForMeasure(ColumnPage columnPage) {
SimpleStatsResult stats = columnPage.getStatistics();
DataType dataType = stats.getDataType();
if (dataType == DataTypes.BOOLEAN) {
return new DirectCompressCodec(columnPage.getDataType()).createEncoder(null);
} else if (dataType == DataTypes.BYTE || dataType == DataTypes.SHORT || dataType == DataTypes.INT || dataType == DataTypes.LONG) {
return selectCodecByAlgorithmForIntegral(stats).createEncoder(null);
} else if (DataTypes.isDecimal(dataType)) {
return createEncoderForDecimalDataTypeMeasure(columnPage);
} else if (dataType == DataTypes.FLOAT || dataType == DataTypes.DOUBLE) {
return selectCodecByAlgorithmForFloating(stats).createEncoder(null);
} else if (dataType == DataTypes.BYTE_ARRAY) {
return new DirectCompressCodec(columnPage.getDataType()).createEncoder(null);
} else {
throw new RuntimeException("unsupported data type: " + stats.getDataType());
}
}
use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.
the class DefaultEncodingFactory method selectCodecByAlgorithmForDecimal.
/**
* choose between adaptive encoder or delta adaptive encoder, based on whose target data type
* size is smaller for decimal data type
*/
static ColumnPageCodec selectCodecByAlgorithmForDecimal(SimpleStatsResult stats, DecimalConverterFactory.DecimalConverterType decimalConverterType) {
DataType srcDataType = stats.getDataType();
DataType adaptiveDataType = fitMinMaxForDecimalType(stats.getDataType(), stats.getMax(), stats.getMin(), decimalConverterType);
DataType deltaDataType;
if (adaptiveDataType == DataTypes.LONG) {
deltaDataType = DataTypes.LONG;
} else {
deltaDataType = fitDeltaForDecimalType(stats.getDataType(), stats.getMax(), stats.getMin(), decimalConverterType);
}
// that get size in bytes
if (Math.min(adaptiveDataType.getSizeInBytes(), deltaDataType.getSizeInBytes()) == srcDataType.getSizeInBytes()) {
// no effect to use adaptive or delta, use compression only
return new DirectCompressCodec(stats.getDataType());
}
if (adaptiveDataType.getSizeInBytes() <= deltaDataType.getSizeInBytes()) {
// choose adaptive encoding
return new AdaptiveIntegralCodec(stats.getDataType(), adaptiveDataType, stats);
} else {
// choose delta adaptive encoding
return new AdaptiveDeltaIntegralCodec(stats.getDataType(), deltaDataType, stats);
}
}
use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.
the class EncodingFactory method createDecoderLegacy.
/**
* Old way of creating decoder, based on algorithm
*/
public ColumnPageDecoder createDecoderLegacy(ValueEncoderMeta metadata) {
SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
TableSpec.ColumnSpec spec = TableSpec.ColumnSpec.newInstanceLegacy("legacy", stats.getDataType(), ColumnType.MEASURE);
String compressor = "snappy";
DataType dataType = DataType.getDataType(metadata.getType());
if (dataType == DataTypes.BYTE || dataType == DataTypes.SHORT || dataType == DataTypes.INT || dataType == DataTypes.LONG) {
// create the codec based on algorithm and create decoder by recovering the metadata
ColumnPageCodec codec = DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(stats);
if (codec instanceof AdaptiveIntegralCodec) {
AdaptiveIntegralCodec adaptiveCodec = (AdaptiveIntegralCodec) codec;
ColumnPageEncoderMeta meta = new ColumnPageEncoderMeta(spec, adaptiveCodec.getTargetDataType(), stats, compressor);
return codec.createDecoder(meta);
} else if (codec instanceof AdaptiveDeltaIntegralCodec) {
AdaptiveDeltaIntegralCodec adaptiveCodec = (AdaptiveDeltaIntegralCodec) codec;
ColumnPageEncoderMeta meta = new ColumnPageEncoderMeta(spec, adaptiveCodec.getTargetDataType(), stats, compressor);
return codec.createDecoder(meta);
} else if (codec instanceof DirectCompressCodec) {
ColumnPageEncoderMeta meta = new ColumnPageEncoderMeta(spec, DataType.getDataType(metadata.getType()), stats, compressor);
return codec.createDecoder(meta);
} else {
throw new RuntimeException("internal error");
}
} else if (dataType == DataTypes.FLOAT || dataType == DataTypes.DOUBLE) {
// create the codec based on algorithm and create decoder by recovering the metadata
ColumnPageCodec codec = DefaultEncodingFactory.selectCodecByAlgorithmForFloating(stats);
if (codec instanceof AdaptiveFloatingCodec) {
AdaptiveFloatingCodec adaptiveCodec = (AdaptiveFloatingCodec) codec;
ColumnPageEncoderMeta meta = new ColumnPageEncoderMeta(spec, adaptiveCodec.getTargetDataType(), stats, compressor);
return codec.createDecoder(meta);
} else if (codec instanceof DirectCompressCodec) {
ColumnPageEncoderMeta meta = new ColumnPageEncoderMeta(spec, DataType.getDataType(metadata.getType()), stats, compressor);
return codec.createDecoder(meta);
} else if (codec instanceof AdaptiveDeltaFloatingCodec) {
AdaptiveDeltaFloatingCodec adaptiveCodec = (AdaptiveDeltaFloatingCodec) codec;
ColumnPageEncoderMeta meta = new ColumnPageEncoderMeta(spec, adaptiveCodec.getTargetDataType(), stats, compressor);
return codec.createDecoder(meta);
} else {
throw new RuntimeException("internal error");
}
} else if (DataTypes.isDecimal(dataType) || dataType == DataTypes.BYTE_ARRAY) {
// no dictionary dimension
return new DirectCompressCodec(stats.getDataType()).createDecoder(new ColumnPageEncoderMeta(spec, stats.getDataType(), stats, compressor));
} else if (dataType == DataTypes.LEGACY_LONG) {
// In case of older versions like in V1 format it has special datatype to handle
AdaptiveIntegralCodec adaptiveCodec = new AdaptiveIntegralCodec(DataTypes.LONG, DataTypes.LONG, stats);
ColumnPageEncoderMeta meta = new ColumnPageEncoderMeta(spec, adaptiveCodec.getTargetDataType(), stats, compressor);
return adaptiveCodec.createDecoder(meta);
} else {
throw new RuntimeException("unsupported data type: " + stats.getDataType());
}
}
use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.
the class TestEncodingFactory method testSelectProperDeltaType2.
@Test
public void testSelectProperDeltaType2() {
PrimitivePageStatsCollector primitivePageStatsCollector = PrimitivePageStatsCollector.newInstance(DataTypes.LONG);
// for Byte
primitivePageStatsCollector.update((long) 200);
ColumnPageCodec columnPageCodec = DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector, false, null);
assert (columnPageCodec instanceof AdaptiveDeltaIntegralCodec);
assert (DataTypes.BYTE == ((AdaptiveDeltaIntegralCodec) columnPageCodec).getTargetDataType());
// for Short
primitivePageStatsCollector.update((long) 634767);
columnPageCodec = DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector, false, null);
assert (columnPageCodec instanceof AdaptiveIntegralCodec);
assert (DataTypes.SHORT_INT == ((AdaptiveIntegralCodec) columnPageCodec).getTargetDataType());
// for int
primitivePageStatsCollector.update((long) (Integer.MAX_VALUE + 200));
columnPageCodec = DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector, false, null);
assert (columnPageCodec instanceof AdaptiveIntegralCodec);
assert (DataTypes.INT == ((AdaptiveIntegralCodec) columnPageCodec).getTargetDataType());
// for int
primitivePageStatsCollector.update(Long.MAX_VALUE);
columnPageCodec = DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(primitivePageStatsCollector, false, null);
assert (columnPageCodec instanceof DirectCompressCodec);
assert ("DirectCompressCodec".equals(columnPageCodec.getName()));
}
Aggregations