use of org.apache.hadoop.hive.common.type.HiveDecimal in project hive by apache.
the class ColumnStatisticsObjTranslator method unpackDecimalStats.
private static void unpackDecimalStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) {
if (fName.equals("countnulls")) {
long v = ((LongObjectInspector) oi).get(o);
statsObj.getStatsData().getDecimalStats().setNumNulls(v);
} else if (fName.equals("numdistinctvalues")) {
long v = ((LongObjectInspector) oi).get(o);
statsObj.getStatsData().getDecimalStats().setNumDVs(v);
} else if (fName.equals("max")) {
HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o);
statsObj.getStatsData().getDecimalStats().setHighValue(convertToThriftDecimal(d));
} else if (fName.equals("min")) {
HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o);
statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d));
} else if (fName.equals("ndvbitvector")) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o);
statsObj.getStatsData().getDecimalStats().setBitVectors(buf);
;
}
}
use of org.apache.hadoop.hive.common.type.HiveDecimal in project hive by apache.
the class StatsUtils method getColStatistics.
/**
* Convert ColumnStatisticsObj to ColStatistics
* @param cso
* - ColumnStatisticsObj
* @param tabName
* - table name
* @param colName
* - column name
* @return ColStatistics
*/
public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tabName, String colName) {
String colTypeLowerCase = cso.getColType().toLowerCase();
ColStatistics cs = new ColStatistics(colName, colTypeLowerCase);
ColumnStatisticsData csd = cso.getStatsData();
if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)) {
cs.setCountDistint(csd.getLongStats().getNumDVs());
cs.setNumNulls(csd.getLongStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive1());
cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue());
} else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) {
cs.setCountDistint(csd.getLongStats().getNumDVs());
cs.setNumNulls(csd.getLongStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive2());
cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue());
} else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
cs.setCountDistint(csd.getDoubleStats().getNumDVs());
cs.setNumNulls(csd.getDoubleStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive1());
cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue());
} else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
cs.setCountDistint(csd.getDoubleStats().getNumDVs());
cs.setNumNulls(csd.getDoubleStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive2());
cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue());
} else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
cs.setCountDistint(csd.getStringStats().getNumDVs());
cs.setNumNulls(csd.getStringStats().getNumNulls());
cs.setAvgColLen(csd.getStringStats().getAvgColLen());
} else if (colTypeLowerCase.equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
if (csd.getBooleanStats().getNumFalses() > 0 && csd.getBooleanStats().getNumTrues() > 0) {
cs.setCountDistint(2);
} else {
cs.setCountDistint(1);
}
cs.setNumTrues(csd.getBooleanStats().getNumTrues());
cs.setNumFalses(csd.getBooleanStats().getNumFalses());
cs.setNumNulls(csd.getBooleanStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive1());
} else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
cs.setAvgColLen(csd.getBinaryStats().getAvgColLen());
cs.setNumNulls(csd.getBinaryStats().getNumNulls());
} else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) {
cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp());
} else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
cs.setCountDistint(csd.getDecimalStats().getNumDVs());
cs.setNumNulls(csd.getDecimalStats().getNumNulls());
Decimal highValue = csd.getDecimalStats().getHighValue();
Decimal lowValue = csd.getDecimalStats().getLowValue();
if (highValue != null && highValue.getUnscaled() != null && lowValue != null && lowValue.getUnscaled() != null) {
HiveDecimal maxHiveDec = HiveDecimal.create(new BigInteger(highValue.getUnscaled()), highValue.getScale());
BigDecimal maxVal = maxHiveDec == null ? null : maxHiveDec.bigDecimalValue();
HiveDecimal minHiveDec = HiveDecimal.create(new BigInteger(lowValue.getUnscaled()), lowValue.getScale());
BigDecimal minVal = minHiveDec == null ? null : minHiveDec.bigDecimalValue();
if (minVal != null && maxVal != null) {
cs.setRange(minVal, maxVal);
}
}
} else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
cs.setNumNulls(csd.getDateStats().getNumNulls());
Long lowVal = (csd.getDateStats().getLowValue() != null) ? csd.getDateStats().getLowValue().getDaysSinceEpoch() : null;
Long highVal = (csd.getDateStats().getHighValue() != null) ? csd.getDateStats().getHighValue().getDaysSinceEpoch() : null;
cs.setRange(lowVal, highVal);
} else {
// Columns statistics for complex datatypes are not supported yet
return null;
}
return cs;
}
use of org.apache.hadoop.hive.common.type.HiveDecimal in project hive by apache.
the class VectorAssignRow method assignConvertRowColumn.
private void assignConvertRowColumn(ColumnVector columnVector, int batchIndex, TypeInfo targetTypeInfo, ObjectInspector sourceObjectInspector, Writable convertTargetWritable, Object object) {
final Category targetCategory = targetTypeInfo.getCategory();
if (targetCategory == null) {
/*
* This is a column that we don't want (i.e. not included) -- we are done.
*/
return;
}
if (object == null) {
VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
return;
}
try {
switch(targetCategory) {
case PRIMITIVE:
final PrimitiveObjectInspector sourcePrimitiveOI = (PrimitiveObjectInspector) sourceObjectInspector;
final PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) targetTypeInfo).getPrimitiveCategory();
switch(targetPrimitiveCategory) {
case VOID:
VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
return;
case BOOLEAN:
((LongColumnVector) columnVector).vector[batchIndex] = (PrimitiveObjectInspectorUtils.getBoolean(object, sourcePrimitiveOI) ? 1 : 0);
break;
case BYTE:
((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getByte(object, sourcePrimitiveOI);
break;
case SHORT:
((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getShort(object, sourcePrimitiveOI);
break;
case INT:
((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getInt(object, sourcePrimitiveOI);
break;
case LONG:
((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getLong(object, sourcePrimitiveOI);
break;
case TIMESTAMP:
{
final Timestamp timestamp = PrimitiveObjectInspectorUtils.getTimestamp(object, sourcePrimitiveOI);
if (timestamp == null) {
VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
return;
}
((TimestampColumnVector) columnVector).set(batchIndex, timestamp);
}
break;
case DATE:
{
final Date date = PrimitiveObjectInspectorUtils.getDate(object, sourcePrimitiveOI);
if (date == null) {
VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
return;
}
DateWritable dateWritable = (DateWritable) convertTargetWritable;
if (dateWritable == null) {
dateWritable = new DateWritable();
}
dateWritable.set(date);
((LongColumnVector) columnVector).vector[batchIndex] = dateWritable.getDays();
}
break;
case FLOAT:
((DoubleColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getFloat(object, sourcePrimitiveOI);
break;
case DOUBLE:
((DoubleColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getDouble(object, sourcePrimitiveOI);
break;
case BINARY:
{
final BytesWritable bytesWritable = PrimitiveObjectInspectorUtils.getBinary(object, sourcePrimitiveOI);
if (bytesWritable == null) {
VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
return;
}
((BytesColumnVector) columnVector).setVal(batchIndex, bytesWritable.getBytes(), 0, bytesWritable.getLength());
}
break;
case STRING:
{
final String string = PrimitiveObjectInspectorUtils.getString(object, sourcePrimitiveOI);
if (string == null) {
VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
return;
}
Text text = (Text) convertTargetWritable;
if (text == null) {
text = new Text();
}
text.set(string);
((BytesColumnVector) columnVector).setVal(batchIndex, text.getBytes(), 0, text.getLength());
}
break;
case VARCHAR:
{
// UNDONE: Performance problem with conversion to String, then bytes...
final HiveVarchar hiveVarchar = PrimitiveObjectInspectorUtils.getHiveVarchar(object, sourcePrimitiveOI);
if (hiveVarchar == null) {
VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
return;
}
// TODO: Do we need maxLength checking?
byte[] bytes = hiveVarchar.getValue().getBytes();
((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
}
break;
case CHAR:
{
// UNDONE: Performance problem with conversion to String, then bytes...
final HiveChar hiveChar = PrimitiveObjectInspectorUtils.getHiveChar(object, sourcePrimitiveOI);
if (hiveChar == null) {
VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
return;
}
// We store CHAR in vector row batch with padding stripped.
// TODO: Do we need maxLength checking?
final byte[] bytes = hiveChar.getStrippedValue().getBytes();
((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
}
break;
case DECIMAL:
{
final HiveDecimal hiveDecimal = PrimitiveObjectInspectorUtils.getHiveDecimal(object, sourcePrimitiveOI);
if (hiveDecimal == null) {
VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
return;
}
if (columnVector instanceof Decimal64ColumnVector) {
Decimal64ColumnVector dec64ColVector = (Decimal64ColumnVector) columnVector;
dec64ColVector.set(batchIndex, hiveDecimal);
if (dec64ColVector.isNull[batchIndex]) {
return;
}
} else {
((DecimalColumnVector) columnVector).set(batchIndex, hiveDecimal);
}
}
break;
case INTERVAL_YEAR_MONTH:
{
final HiveIntervalYearMonth intervalYearMonth = PrimitiveObjectInspectorUtils.getHiveIntervalYearMonth(object, sourcePrimitiveOI);
if (intervalYearMonth == null) {
VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
return;
}
((LongColumnVector) columnVector).vector[batchIndex] = intervalYearMonth.getTotalMonths();
}
break;
case INTERVAL_DAY_TIME:
{
final HiveIntervalDayTime intervalDayTime = PrimitiveObjectInspectorUtils.getHiveIntervalDayTime(object, sourcePrimitiveOI);
if (intervalDayTime == null) {
VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
return;
}
((IntervalDayTimeColumnVector) columnVector).set(batchIndex, intervalDayTime);
}
break;
default:
throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() + " not supported");
}
break;
case LIST:
{
final ListColumnVector listColumnVector = (ListColumnVector) columnVector;
final ListObjectInspector sourceListOI = (ListObjectInspector) sourceObjectInspector;
final ObjectInspector sourceElementOI = sourceListOI.getListElementObjectInspector();
final int size = sourceListOI.getListLength(object);
final TypeInfo targetElementTypeInfo = ((ListTypeInfo) targetTypeInfo).getListElementTypeInfo();
listColumnVector.offsets[batchIndex] = listColumnVector.childCount;
listColumnVector.childCount += size;
listColumnVector.ensureSize(listColumnVector.childCount, true);
listColumnVector.lengths[batchIndex] = size;
for (int i = 0; i < size; i++) {
final Object element = sourceListOI.getListElement(object, i);
final int offset = (int) (listColumnVector.offsets[batchIndex] + i);
assignConvertRowColumn(listColumnVector.child, offset, targetElementTypeInfo, sourceElementOI, null, element);
}
}
break;
case MAP:
{
final MapColumnVector mapColumnVector = (MapColumnVector) columnVector;
final MapObjectInspector mapObjectInspector = (MapObjectInspector) sourceObjectInspector;
final MapTypeInfo mapTypeInfo = (MapTypeInfo) targetTypeInfo;
final Map<?, ?> map = mapObjectInspector.getMap(object);
for (Map.Entry<?, ?> entry : map.entrySet()) {
assignConvertRowColumn(mapColumnVector.keys, batchIndex, mapTypeInfo.getMapKeyTypeInfo(), mapObjectInspector.getMapKeyObjectInspector(), null, entry.getKey());
assignConvertRowColumn(mapColumnVector.values, batchIndex, mapTypeInfo.getMapValueTypeInfo(), mapObjectInspector.getMapValueObjectInspector(), null, entry.getValue());
}
}
break;
case STRUCT:
{
final StructColumnVector structColumnVector = (StructColumnVector) columnVector;
final StructObjectInspector sourceStructOI = (StructObjectInspector) sourceObjectInspector;
final List<? extends StructField> sourceFields = sourceStructOI.getAllStructFieldRefs();
final StructTypeInfo targetStructTypeInfo = (StructTypeInfo) targetTypeInfo;
final List<TypeInfo> targetTypeInfos = targetStructTypeInfo.getAllStructFieldTypeInfos();
final int size = targetTypeInfos.size();
for (int i = 0; i < size; i++) {
if (i < sourceFields.size()) {
final StructField sourceStructField = sourceFields.get(i);
final ObjectInspector sourceFieldOI = sourceStructField.getFieldObjectInspector();
final Object sourceData = sourceStructOI.getStructFieldData(object, sourceStructField);
assignConvertRowColumn(structColumnVector.fields[i], batchIndex, targetTypeInfos.get(i), sourceFieldOI, null, sourceData);
} else {
final ColumnVector fieldColumnVector = structColumnVector.fields[i];
VectorizedBatchUtil.setNullColIsNullValue(fieldColumnVector, batchIndex);
}
}
}
break;
case UNION:
{
final UnionColumnVector unionColumnVector = (UnionColumnVector) columnVector;
final UnionObjectInspector unionObjectInspector = (UnionObjectInspector) sourceObjectInspector;
final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) targetTypeInfo;
final int tag = unionObjectInspector.getTag(object);
assignConvertRowColumn(unionColumnVector.fields[tag], batchIndex, unionTypeInfo.getAllUnionObjectTypeInfos().get(tag), unionObjectInspector.getObjectInspectors().get(tag), null, unionObjectInspector.getField(tag));
}
break;
default:
throw new RuntimeException("Category " + targetCategory.name() + " not supported");
}
} catch (NumberFormatException e) {
// Some of the conversion methods throw this exception on numeric parsing errors.
VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
return;
}
// We always set the null flag to false when there is a value.
columnVector.isNull[batchIndex] = false;
}
use of org.apache.hadoop.hive.common.type.HiveDecimal in project hive by apache.
the class VectorizationContext method decimalTypeFromCastToDecimal.
/*
* This method must return the decimal TypeInfo for what getCastToDecimal will produce.
*/
private DecimalTypeInfo decimalTypeFromCastToDecimal(ExprNodeDesc exprNodeDesc, DecimalTypeInfo returnDecimalType) throws HiveException {
if (exprNodeDesc instanceof ExprNodeConstantDesc) {
// Return a constant vector expression
Object constantValue = ((ExprNodeConstantDesc) exprNodeDesc).getValue();
HiveDecimal decimalValue = castConstantToDecimal(constantValue, exprNodeDesc.getTypeInfo());
if (decimalValue == null) {
// Return something.
return returnDecimalType;
}
return new DecimalTypeInfo(decimalValue.precision(), decimalValue.scale());
}
String inputType = exprNodeDesc.getTypeString();
if (isIntFamily(inputType) || isFloatFamily(inputType) || decimalTypePattern.matcher(inputType).matches() || isStringFamily(inputType) || inputType.equals("timestamp")) {
return returnDecimalType;
}
return null;
}
use of org.apache.hadoop.hive.common.type.HiveDecimal in project hive by apache.
the class VectorizationContext method castConstantToLong.
private Long castConstantToLong(Object scalar, TypeInfo type, PrimitiveCategory integerPrimitiveCategory) throws HiveException {
if (null == scalar) {
return null;
}
PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
String typename = type.getTypeName();
switch(ptinfo.getPrimitiveCategory()) {
case FLOAT:
case DOUBLE:
case BYTE:
case SHORT:
case INT:
case LONG:
return ((Number) scalar).longValue();
case DECIMAL:
HiveDecimal decimalVal = (HiveDecimal) scalar;
switch(integerPrimitiveCategory) {
case BYTE:
if (!decimalVal.isByte()) {
// Accurate byte value cannot be obtained.
return null;
}
break;
case SHORT:
if (!decimalVal.isShort()) {
// Accurate short value cannot be obtained.
return null;
}
break;
case INT:
if (!decimalVal.isInt()) {
// Accurate int value cannot be obtained.
return null;
}
break;
case LONG:
if (!decimalVal.isLong()) {
// Accurate long value cannot be obtained.
return null;
}
break;
default:
throw new RuntimeException("Unexpected integer primitive type " + integerPrimitiveCategory);
}
// We only store longs in our LongColumnVector.
return decimalVal.longValue();
default:
throw new HiveException("Unsupported type " + typename + " for cast to Long");
}
}
Aggregations