use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.
the class DefaultEncodingFactory method selectCodecByAlgorithmForFloating.
// choose between upscale adaptive encoder or upscale delta adaptive encoder,
// based on whose target data type size is smaller
static ColumnPageCodec selectCodecByAlgorithmForFloating(SimpleStatsResult stats, boolean isComplexPrimitive, TableSpec.ColumnSpec columnSpec) {
DataType srcDataType = stats.getDataType();
double maxValue;
double minValue;
if (srcDataType == DataTypes.FLOAT) {
maxValue = (float) stats.getMax();
minValue = (float) stats.getMin();
} else {
maxValue = (double) stats.getMax();
minValue = (double) stats.getMin();
}
int decimalCount = stats.getDecimalCount();
// to do that decimal count should be actual count instead of -1.
if (isComplexPrimitive && decimalCount == -1 && stats instanceof PrimitivePageStatsCollector) {
decimalCount = ((PrimitivePageStatsCollector) stats).getDecimalForComplexPrimitive();
}
// Here we should use the Max abs as max to getDatatype, let's say -1 and -10000000, -1 is max,
// but we can't use -1 to getDatatype, we should use -10000000.
double absMaxValue = Math.max(Math.abs(maxValue), Math.abs(minValue));
if (srcDataType == DataTypes.FLOAT && decimalCount == 0) {
return getColumnPageCodec(stats, isComplexPrimitive, columnSpec, srcDataType, maxValue, minValue, decimalCount, absMaxValue);
} else if (decimalCount == 0) {
// short, int, long
return selectCodecByAlgorithmForIntegral(stats, false, columnSpec);
} else if (decimalCount < 0 && !isComplexPrimitive) {
return new DirectCompressCodec(DataTypes.DOUBLE);
} else {
return getColumnPageCodec(stats, isComplexPrimitive, columnSpec, srcDataType, maxValue, minValue, decimalCount, absMaxValue);
}
}
use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.
the class DefaultEncodingFactory method selectCodecByAlgorithmForIntegral.
/**
* choose between adaptive encoder or delta adaptive encoder, based on whose target data type
* size is smaller
*/
static ColumnPageCodec selectCodecByAlgorithmForIntegral(SimpleStatsResult stats, boolean isComplexPrimitive, TableSpec.ColumnSpec columnSpec) {
DataType srcDataType = stats.getDataType();
DataType adaptiveDataType = fitMinMax(stats.getDataType(), stats.getMax(), stats.getMin());
DataType deltaDataType = fitDelta(stats.getDataType(), stats.getMax(), stats.getMin());
// for complex primitive, if source and destination data type is same, use adaptive encoding.
if (!isComplexPrimitive) {
// on that get size in bytes
if (Math.min(adaptiveDataType.getSizeInBytes(), deltaDataType.getSizeInBytes()) == srcDataType.getSizeInBytes()) {
// no effect to use adaptive or delta, use compression only
return new DirectCompressCodec(stats.getDataType());
}
}
boolean isInvertedIndex = isInvertedIndex(isComplexPrimitive, columnSpec);
if (adaptiveDataType.getSizeInBytes() <= deltaDataType.getSizeInBytes()) {
// choose adaptive encoding
return new AdaptiveIntegralCodec(stats.getDataType(), adaptiveDataType, stats, isInvertedIndex);
} else {
// choose delta adaptive encoding
return new AdaptiveDeltaIntegralCodec(stats.getDataType(), deltaDataType, stats, isInvertedIndex);
}
}
use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.
the class DefaultEncodingFactory method createEncoderForMeasureOrNoDictionaryPrimitive.
private ColumnPageEncoder createEncoderForMeasureOrNoDictionaryPrimitive(ColumnPage columnPage, TableSpec.ColumnSpec columnSpec) {
SimpleStatsResult stats = columnPage.getStatistics();
DataType dataType = stats.getDataType();
if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.BYTE_ARRAY || columnPage.getDataType() == DataTypes.BINARY) {
return new DirectCompressCodec(columnPage.getDataType()).createEncoder(null);
} else if (dataType == DataTypes.BYTE || dataType == DataTypes.SHORT || dataType == DataTypes.INT || dataType == DataTypes.LONG || dataType == DataTypes.TIMESTAMP) {
return selectCodecByAlgorithmForIntegral(stats, false, columnSpec).createEncoder(null);
} else if (DataTypes.isDecimal(dataType)) {
return createEncoderForDecimalDataTypeMeasure(columnPage, columnSpec);
} else if (dataType == DataTypes.FLOAT || dataType == DataTypes.DOUBLE) {
return selectCodecByAlgorithmForFloating(stats, false, columnSpec).createEncoder(null);
} else {
throw new RuntimeException("unsupported data type: " + stats.getDataType());
}
}
use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.
the class DefaultEncodingFactory method selectCodecByAlgorithmForDecimal.
/**
* choose between adaptive encoder or delta adaptive encoder, based on whose target data type
* size is smaller for decimal data type
*/
static ColumnPageCodec selectCodecByAlgorithmForDecimal(SimpleStatsResult stats, DecimalConverterFactory.DecimalConverterType decimalConverterType, TableSpec.ColumnSpec columnSpec) {
DataType srcDataType = stats.getDataType();
DataType adaptiveDataType = fitMinMaxForDecimalType(stats.getDataType(), stats.getMax(), stats.getMin(), decimalConverterType);
DataType deltaDataType;
if (adaptiveDataType == DataTypes.LONG) {
deltaDataType = DataTypes.LONG;
} else {
deltaDataType = fitDeltaForDecimalType(stats.getDataType(), stats.getMax(), stats.getMin(), decimalConverterType);
}
// that get size in bytes
if (Math.min(adaptiveDataType.getSizeInBytes(), deltaDataType.getSizeInBytes()) == srcDataType.getSizeInBytes()) {
// no effect to use adaptive or delta, use compression only
return new DirectCompressCodec(stats.getDataType());
}
if (adaptiveDataType.getSizeInBytes() <= deltaDataType.getSizeInBytes()) {
// choose adaptive encoding
return new AdaptiveIntegralCodec(stats.getDataType(), adaptiveDataType, stats, isInvertedIndex(columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE, columnSpec));
} else {
// choose delta adaptive encoding
return new AdaptiveDeltaIntegralCodec(stats.getDataType(), deltaDataType, stats, isInvertedIndex(columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE, columnSpec));
}
}
use of org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec in project carbondata by apache.
the class EncodingFactory method createDecoder.
/**
* Return new decoder based on encoder metadata read from file
* @param encodings encodings used to decode the page
* @param encoderMetas metadata of encodings to decode the data
* @param compressor Compressor name which will be used to decode data.
* @param fullVectorFill whether the flow should go to fill the given vector completely while
* decoding the data itself.
* @return decoder to decode page.
* @throws IOException
*/
public ColumnPageDecoder createDecoder(List<Encoding> encodings, List<ByteBuffer> encoderMetas, String compressor, boolean fullVectorFill) throws IOException {
assert (encodings.size() >= 1);
assert (encoderMetas.size() == 1);
boolean isComplexPrimitiveIntLengthEncoding = encodings.contains(Encoding.INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY);
Encoding encoding = encodings.get(0);
byte[] encoderMeta = encoderMetas.get(0).array();
ByteArrayInputStream stream = new ByteArrayInputStream(encoderMeta);
DataInputStream in = new DataInputStream(stream);
if (encoding == DIRECT_COMPRESS || encoding == DIRECT_COMPRESS_VARCHAR) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.setFillCompleteVector(fullVectorFill);
metadata.readFields(in);
DirectCompressCodec directCompressCodec = new DirectCompressCodec(metadata.getStoreDataType());
directCompressCodec.setComplexPrimitiveIntLengthEncoding(isComplexPrimitiveIntLengthEncoding);
return directCompressCodec.createDecoder(metadata);
} else if (encoding == ADAPTIVE_INTEGRAL) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.setFillCompleteVector(fullVectorFill);
metadata.readFields(in);
SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
return new AdaptiveIntegralCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats, encodings.contains(Encoding.INVERTED_INDEX)).createDecoder(metadata);
} else if (encoding == ADAPTIVE_DELTA_INTEGRAL) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.setFillCompleteVector(fullVectorFill);
metadata.readFields(in);
SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
return new AdaptiveDeltaIntegralCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats, encodings.contains(Encoding.INVERTED_INDEX)).createDecoder(metadata);
} else if (encoding == ADAPTIVE_FLOATING) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.setFillCompleteVector(fullVectorFill);
metadata.readFields(in);
SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
return new AdaptiveFloatingCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats, encodings.contains(Encoding.INVERTED_INDEX)).createDecoder(metadata);
} else if (encoding == ADAPTIVE_DELTA_FLOATING) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.setFillCompleteVector(fullVectorFill);
metadata.readFields(in);
SimpleStatsResult stats = PrimitivePageStatsCollector.newInstance(metadata);
return new AdaptiveDeltaFloatingCodec(metadata.getSchemaDataType(), metadata.getStoreDataType(), stats, encodings.contains(Encoding.INVERTED_INDEX)).createDecoder(metadata);
} else if (encoding == RLE_INTEGRAL) {
RLEEncoderMeta metadata = new RLEEncoderMeta();
metadata.readFields(in);
return new RLECodec().createDecoder(metadata);
} else if (encoding == BOOL_BYTE) {
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
metadata.setFillCompleteVector(fullVectorFill);
metadata.readFields(in);
return new DirectCompressCodec(metadata.getStoreDataType()).createDecoder(metadata);
} else {
// for backward compatibility
ValueEncoderMeta metadata = CarbonUtil.deserializeEncoderMetaV3(encoderMeta);
return createDecoderLegacy(metadata, compressor, fullVectorFill);
}
}
Aggregations