use of org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveFloatingCodec in project carbondata by apache.
the class DefaultEncodingFactory method selectCodecByAlgorithmForFloating.
// choose between upscale adaptive encoder or upscale delta adaptive encoder,
// based on whose target data type size is smaller
static ColumnPageCodec selectCodecByAlgorithmForFloating(SimpleStatsResult stats) {
DataType srcDataType = stats.getDataType();
double maxValue = (double) stats.getMax();
double minValue = (double) stats.getMin();
int decimalCount = stats.getDecimalCount();
// Here we should use the Max abs as max to getDatatype, let's say -1 and -10000000, -1 is max,
// but we can't use -1 to getDatatype, we should use -10000000.
double absMaxValue = Math.max(Math.abs(maxValue), Math.abs(minValue));
if (decimalCount == 0) {
// short, int, long
return selectCodecByAlgorithmForIntegral(stats);
} else if (decimalCount < 0) {
return new DirectCompressCodec(DataTypes.DOUBLE);
} else {
// double
long max = (long) (Math.pow(10, decimalCount) * absMaxValue);
DataType adaptiveDataType = fitLongMinMax(max, 0);
DataType deltaDataType = compareMinMaxAndSelectDataType((long) (Math.pow(10, decimalCount) * (maxValue - minValue)));
if (adaptiveDataType.getSizeInBytes() > deltaDataType.getSizeInBytes()) {
return new AdaptiveDeltaFloatingCodec(srcDataType, deltaDataType, stats);
} else if (adaptiveDataType.getSizeInBytes() < DataTypes.DOUBLE.getSizeInBytes()) {
return new AdaptiveFloatingCodec(srcDataType, adaptiveDataType, stats);
} else {
return new DirectCompressCodec(DataTypes.DOUBLE);
}
}
}
use of org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveFloatingCodec in project carbondata by apache.
the class EncodingFactory method createDecoderLegacy.
/**
* Old way of creating decoder, based on algorithm
*/
public ColumnPageDecoder createDecoderLegacy(ValueEncoderMeta metadata) {
SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
TableSpec.ColumnSpec spec = TableSpec.ColumnSpec.newInstanceLegacy("legacy", stats.getDataType(), ColumnType.MEASURE);
String compressor = "snappy";
DataType dataType = DataType.getDataType(metadata.getType());
if (dataType == DataTypes.BYTE || dataType == DataTypes.SHORT || dataType == DataTypes.INT || dataType == DataTypes.LONG) {
// create the codec based on algorithm and create decoder by recovering the metadata
ColumnPageCodec codec = DefaultEncodingFactory.selectCodecByAlgorithmForIntegral(stats);
if (codec instanceof AdaptiveIntegralCodec) {
AdaptiveIntegralCodec adaptiveCodec = (AdaptiveIntegralCodec) codec;
ColumnPageEncoderMeta meta = new ColumnPageEncoderMeta(spec, adaptiveCodec.getTargetDataType(), stats, compressor);
return codec.createDecoder(meta);
} else if (codec instanceof AdaptiveDeltaIntegralCodec) {
AdaptiveDeltaIntegralCodec adaptiveCodec = (AdaptiveDeltaIntegralCodec) codec;
ColumnPageEncoderMeta meta = new ColumnPageEncoderMeta(spec, adaptiveCodec.getTargetDataType(), stats, compressor);
return codec.createDecoder(meta);
} else if (codec instanceof DirectCompressCodec) {
ColumnPageEncoderMeta meta = new ColumnPageEncoderMeta(spec, DataType.getDataType(metadata.getType()), stats, compressor);
return codec.createDecoder(meta);
} else {
throw new RuntimeException("internal error");
}
} else if (dataType == DataTypes.FLOAT || dataType == DataTypes.DOUBLE) {
// create the codec based on algorithm and create decoder by recovering the metadata
ColumnPageCodec codec = DefaultEncodingFactory.selectCodecByAlgorithmForFloating(stats);
if (codec instanceof AdaptiveFloatingCodec) {
AdaptiveFloatingCodec adaptiveCodec = (AdaptiveFloatingCodec) codec;
ColumnPageEncoderMeta meta = new ColumnPageEncoderMeta(spec, adaptiveCodec.getTargetDataType(), stats, compressor);
return codec.createDecoder(meta);
} else if (codec instanceof DirectCompressCodec) {
ColumnPageEncoderMeta meta = new ColumnPageEncoderMeta(spec, DataType.getDataType(metadata.getType()), stats, compressor);
return codec.createDecoder(meta);
} else if (codec instanceof AdaptiveDeltaFloatingCodec) {
AdaptiveDeltaFloatingCodec adaptiveCodec = (AdaptiveDeltaFloatingCodec) codec;
ColumnPageEncoderMeta meta = new ColumnPageEncoderMeta(spec, adaptiveCodec.getTargetDataType(), stats, compressor);
return codec.createDecoder(meta);
} else {
throw new RuntimeException("internal error");
}
} else if (DataTypes.isDecimal(dataType) || dataType == DataTypes.BYTE_ARRAY) {
// no dictionary dimension
return new DirectCompressCodec(stats.getDataType()).createDecoder(new ColumnPageEncoderMeta(spec, stats.getDataType(), stats, compressor));
} else if (dataType == DataTypes.LEGACY_LONG) {
// In case of older versions like in V1 format it has special datatype to handle
AdaptiveIntegralCodec adaptiveCodec = new AdaptiveIntegralCodec(DataTypes.LONG, DataTypes.LONG, stats);
ColumnPageEncoderMeta meta = new ColumnPageEncoderMeta(spec, adaptiveCodec.getTargetDataType(), stats, compressor);
return adaptiveCodec.createDecoder(meta);
} else {
throw new RuntimeException("unsupported data type: " + stats.getDataType());
}
}
use of org.apache.carbondata.core.datastore.page.encoding.adaptive.AdaptiveFloatingCodec in project carbondata by apache.
the class EncodingFactory method createDecoder.
/**
* Return new decoder based on encoder metadata read from file
*/
public ColumnPageDecoder createDecoder(List<Encoding> encodings, List<ByteBuffer> encoderMetas) throws IOException {
assert (encodings.size() == 1);
assert (encoderMetas.size() == 1);
Encoding encoding = encodings.get(0);
byte[] encoderMeta = encoderMetas.get(0).array();
ByteArrayInputStream stream = new ByteArrayInputStream(encoderMeta);
DataInputStream in = new DataInputStream(stream);
if (encoding == DIRECT_COMPRESS) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.readFields(in);
return new DirectCompressCodec(metadata.getStoreDataType()).createDecoder(metadata);
} else if (encoding == ADAPTIVE_INTEGRAL) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.readFields(in);
SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
return new AdaptiveIntegralCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats).createDecoder(metadata);
} else if (encoding == ADAPTIVE_DELTA_INTEGRAL) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.readFields(in);
SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
return new AdaptiveDeltaIntegralCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats).createDecoder(metadata);
} else if (encoding == ADAPTIVE_FLOATING) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.readFields(in);
SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
return new AdaptiveFloatingCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats).createDecoder(metadata);
} else if (encoding == ADAPTIVE_DELTA_FLOATING) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.readFields(in);
SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
return new AdaptiveDeltaFloatingCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats).createDecoder(metadata);
} else if (encoding == RLE_INTEGRAL) {
RLEEncoderMeta metadata = new RLEEncoderMeta();
metadata.readFields(in);
return new RLECodec().createDecoder(metadata);
} else if (encoding == BOOL_BYTE) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.readFields(in);
return new DirectCompressCodec(metadata.getStoreDataType()).createDecoder(metadata);
} else {
// for backward compatibility
ValueEncoderMeta metadata = CarbonUtil.deserializeEncoderMetaV3(encoderMeta);
return createDecoderLegacy(metadata);
}
}
Aggregations