use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class ConstantPropagateProcFactory method evaluateFunction.
/**
* Evaluate UDF
*
* @param udf UDF object
* @param exprs
* @param oldExprs
* @return null if expression cannot be evaluated (not all parameters are constants). Or evaluated
* ExprNodeConstantDesc if possible.
* @throws HiveException
*/
private static ExprNodeDesc evaluateFunction(GenericUDF udf, List<ExprNodeDesc> exprs, List<ExprNodeDesc> oldExprs) {
DeferredJavaObject[] arguments = new DeferredJavaObject[exprs.size()];
ObjectInspector[] argois = new ObjectInspector[exprs.size()];
for (int i = 0; i < exprs.size(); i++) {
ExprNodeDesc desc = exprs.get(i);
if (desc instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc constant = (ExprNodeConstantDesc) exprs.get(i);
if (!constant.getTypeInfo().equals(oldExprs.get(i).getTypeInfo())) {
constant = typeCast(constant, oldExprs.get(i).getTypeInfo());
if (constant == null) {
return null;
}
}
if (constant.getTypeInfo().getCategory() != Category.PRIMITIVE) {
// nested complex types cannot be folded cleanly
return null;
}
Object value = constant.getValue();
PrimitiveTypeInfo pti = (PrimitiveTypeInfo) constant.getTypeInfo();
Object writableValue = null == value ? value : PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti).getPrimitiveWritableObject(value);
arguments[i] = new DeferredJavaObject(writableValue);
argois[i] = ObjectInspectorUtils.getConstantObjectInspector(constant.getWritableObjectInspector(), writableValue);
} else if (desc instanceof ExprNodeGenericFuncDesc) {
ExprNodeDesc evaluatedFn = foldExpr((ExprNodeGenericFuncDesc) desc);
if (null == evaluatedFn || !(evaluatedFn instanceof ExprNodeConstantDesc)) {
return null;
}
ExprNodeConstantDesc constant = (ExprNodeConstantDesc) evaluatedFn;
if (constant.getTypeInfo().getCategory() != Category.PRIMITIVE) {
// nested complex types cannot be folded cleanly
return null;
}
Object writableValue = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector((PrimitiveTypeInfo) constant.getTypeInfo()).getPrimitiveWritableObject(constant.getValue());
arguments[i] = new DeferredJavaObject(writableValue);
argois[i] = ObjectInspectorUtils.getConstantObjectInspector(constant.getWritableObjectInspector(), writableValue);
} else {
return null;
}
}
try {
ObjectInspector oi = udf.initialize(argois);
Object o = udf.evaluate(arguments);
if (LOG.isDebugEnabled()) {
LOG.debug(udf.getClass().getName() + "(" + exprs + ")=" + o);
}
if (o == null) {
return new ExprNodeConstantDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(oi), o);
}
Class<?> clz = o.getClass();
if (PrimitiveObjectInspectorUtils.isPrimitiveWritableClass(clz)) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
TypeInfo typeInfo = poi.getTypeInfo();
o = poi.getPrimitiveJavaObject(o);
if (typeInfo.getTypeName().contains(serdeConstants.DECIMAL_TYPE_NAME) || typeInfo.getTypeName().contains(serdeConstants.VARCHAR_TYPE_NAME) || typeInfo.getTypeName().contains(serdeConstants.CHAR_TYPE_NAME) || typeInfo.getTypeName().contains(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) {
return new ExprNodeConstantDesc(typeInfo, o);
}
} else if (udf instanceof GenericUDFStruct && oi instanceof StandardConstantStructObjectInspector) {
// do not fold named_struct, only struct()
ConstantObjectInspector coi = (ConstantObjectInspector) oi;
TypeInfo structType = TypeInfoUtils.getTypeInfoFromObjectInspector(coi);
return new ExprNodeConstantDesc(structType, ObjectInspectorUtils.copyToStandardJavaObject(o, coi));
} else if (!PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(clz)) {
if (LOG.isErrorEnabled()) {
LOG.error("Unable to evaluate " + udf + ". Return value unrecoginizable.");
}
return null;
} else {
// fall through
}
String constStr = null;
if (arguments.length == 1 && FunctionRegistry.isOpCast(udf)) {
// remember original string representation of constant.
constStr = arguments[0].get().toString();
}
return new ExprNodeConstantDesc(o).setFoldedFromVal(constStr);
} catch (HiveException e) {
LOG.error("Evaluation function " + udf.getClass() + " failed in Constant Propagation Optimizer.");
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class VectorizedPrimitiveColumnReader method decodeDictionaryIds.
/**
* Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`.
*/
private void decodeDictionaryIds(int rowId, int num, ColumnVector column, TypeInfo columnType, LongColumnVector dictionaryIds) {
System.arraycopy(dictionaryIds.isNull, rowId, column.isNull, rowId, num);
if (column.noNulls) {
column.noNulls = dictionaryIds.noNulls;
}
column.isRepeating = column.isRepeating && dictionaryIds.isRepeating;
PrimitiveTypeInfo primitiveColumnType = (PrimitiveTypeInfo) columnType;
switch(primitiveColumnType.getPrimitiveCategory()) {
case INT:
case BYTE:
case SHORT:
for (int i = rowId; i < rowId + num; ++i) {
((LongColumnVector) column).vector[i] = dictionary.readInteger((int) dictionaryIds.vector[i]);
if (!(dictionary.isValid(((LongColumnVector) column).vector[i]))) {
setNullValue(column, i);
((LongColumnVector) column).vector[i] = 0;
}
}
break;
case DATE:
case INTERVAL_YEAR_MONTH:
case LONG:
for (int i = rowId; i < rowId + num; ++i) {
((LongColumnVector) column).vector[i] = dictionary.readLong((int) dictionaryIds.vector[i]);
}
break;
case BOOLEAN:
for (int i = rowId; i < rowId + num; ++i) {
((LongColumnVector) column).vector[i] = dictionary.readBoolean((int) dictionaryIds.vector[i]) ? 1 : 0;
}
break;
case DOUBLE:
for (int i = rowId; i < rowId + num; ++i) {
((DoubleColumnVector) column).vector[i] = dictionary.readDouble((int) dictionaryIds.vector[i]);
}
break;
case BINARY:
for (int i = rowId; i < rowId + num; ++i) {
((BytesColumnVector) column).setVal(i, dictionary.readBytes((int) dictionaryIds.vector[i]));
}
break;
case STRING:
for (int i = rowId; i < rowId + num; ++i) {
((BytesColumnVector) column).setVal(i, dictionary.readString((int) dictionaryIds.vector[i]));
}
break;
case VARCHAR:
for (int i = rowId; i < rowId + num; ++i) {
((BytesColumnVector) column).setVal(i, dictionary.readVarchar((int) dictionaryIds.vector[i]));
}
break;
case CHAR:
for (int i = rowId; i < rowId + num; ++i) {
((BytesColumnVector) column).setVal(i, dictionary.readChar((int) dictionaryIds.vector[i]));
}
break;
case FLOAT:
for (int i = rowId; i < rowId + num; ++i) {
((DoubleColumnVector) column).vector[i] = dictionary.readFloat((int) dictionaryIds.vector[i]);
}
break;
case DECIMAL:
decimalTypeCheck(type);
DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column);
decimalColumnVector.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision();
decimalColumnVector.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale();
for (int i = rowId; i < rowId + num; ++i) {
decimalColumnVector.vector[i].set(dictionary.readDecimal((int) dictionaryIds.vector[i]), decimalColumnVector.scale);
}
break;
case TIMESTAMP:
for (int i = rowId; i < rowId + num; ++i) {
((TimestampColumnVector) column).set(i, dictionary.readTimestamp((int) dictionaryIds.vector[i]));
}
break;
case INTERVAL_DAY_TIME:
default:
throw new UnsupportedOperationException("Unsupported type: " + type);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class VectorUDFDateDiffColScalar method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
ColumnVector inputCol = batch.cols[this.colNum];
/* every line below this is identical for evaluateLong & evaluateString */
final int n = inputCol.isRepeating ? 1 : batch.size;
int[] sel = batch.selected;
final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse;
boolean[] outputIsNull = outputColVector.isNull;
if (batch.size == 0) {
/* n != batch.size when isRepeating */
return;
}
// We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
PrimitiveCategory primitiveCategory1 = ((PrimitiveTypeInfo) inputTypeInfos[1]).getPrimitiveCategory();
switch(primitiveCategory1) {
case DATE:
baseDate = (int) longValue;
break;
case TIMESTAMP:
date.setTime(timestampValue.getTime());
baseDate = DateWritable.dateToDays(date);
break;
case STRING:
case CHAR:
case VARCHAR:
try {
date.setTime(formatter.parse(new String(bytesValue, "UTF-8")).getTime());
baseDate = DateWritable.dateToDays(date);
break;
} catch (Exception e) {
outputColVector.noNulls = false;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outputColVector.isNull[i] = true;
}
} else {
for (int i = 0; i < n; i++) {
outputColVector.isNull[i] = true;
}
}
return;
}
default:
throw new Error("Invalid input type #1: " + primitiveCategory1.name());
}
PrimitiveCategory primitiveCategory0 = ((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory();
switch(primitiveCategory0) {
case DATE:
if (inputCol.isRepeating) {
if (inputCol.noNulls || !inputCol.isNull[0]) {
outputColVector.isNull[0] = false;
outputColVector.vector[0] = evaluateDate(inputCol, 0);
} else {
outputColVector.isNull[0] = true;
outputColVector.noNulls = false;
}
outputColVector.isRepeating = true;
} else if (inputCol.noNulls) {
if (batch.selectedInUse) {
if (!outputColVector.noNulls) {
for (int j = 0; j != n; j++) {
final int i = sel[j];
// Set isNull before call in case it changes it mind.
outputIsNull[i] = false;
outputColVector.vector[i] = evaluateDate(inputCol, i);
}
} else {
for (int j = 0; j != n; j++) {
final int i = sel[j];
outputColVector.vector[i] = evaluateDate(inputCol, i);
}
}
} else {
if (!outputColVector.noNulls) {
// Assume it is almost always a performance win to fill all of isNull so we can
// safely reset noNulls.
Arrays.fill(outputIsNull, false);
outputColVector.noNulls = true;
}
for (int i = 0; i != n; i++) {
outputColVector.vector[i] = evaluateDate(inputCol, i);
}
}
} else /* there are nulls in the inputColVector */
{
// Carefully handle NULLs..
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
outputColVector.noNulls = false;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
outputColVector.vector[i] = evaluateDate(inputCol, i);
}
}
} else {
for (int i = 0; i < n; i++) {
outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
outputColVector.vector[i] = evaluateDate(inputCol, i);
}
}
}
}
break;
case TIMESTAMP:
if (inputCol.isRepeating) {
if (inputCol.noNulls || !inputCol.isNull[0]) {
outputColVector.isNull[0] = false;
outputColVector.vector[0] = evaluateTimestamp(inputCol, 0);
} else {
outputColVector.isNull[0] = true;
outputColVector.noNulls = false;
}
outputColVector.isRepeating = true;
} else if (inputCol.noNulls) {
if (batch.selectedInUse) {
if (!outputColVector.noNulls) {
for (int j = 0; j != n; j++) {
final int i = sel[j];
// Set isNull before call in case it changes it mind.
outputIsNull[i] = false;
outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
}
} else {
for (int j = 0; j != n; j++) {
final int i = sel[j];
outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
}
}
} else {
if (!outputColVector.noNulls) {
// Assume it is almost always a performance win to fill all of isNull so we can
// safely reset noNulls.
Arrays.fill(outputIsNull, false);
outputColVector.noNulls = true;
}
for (int i = 0; i != n; i++) {
outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
}
}
} else /* there are nulls in the inputColVector */
{
// Carefully handle NULLs..
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
outputColVector.noNulls = false;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
}
}
} else {
for (int i = 0; i < n; i++) {
outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
outputColVector.vector[i] = evaluateTimestamp(inputCol, i);
}
}
}
}
break;
case STRING:
case CHAR:
case VARCHAR:
if (inputCol.isRepeating) {
if (inputCol.noNulls || !inputCol.isNull[0]) {
outputColVector.isNull[0] = false;
evaluateString(inputCol, outputColVector, 0);
} else {
outputColVector.isNull[0] = true;
outputColVector.noNulls = false;
}
outputColVector.isRepeating = true;
} else if (inputCol.noNulls) {
if (batch.selectedInUse) {
if (!outputColVector.noNulls) {
for (int j = 0; j != n; j++) {
final int i = sel[j];
// Set isNull before call in case it changes it mind.
outputIsNull[i] = false;
evaluateString(inputCol, outputColVector, i);
}
} else {
for (int j = 0; j != n; j++) {
final int i = sel[j];
evaluateString(inputCol, outputColVector, i);
}
}
} else {
if (!outputColVector.noNulls) {
// Assume it is almost always a performance win to fill all of isNull so we can
// safely reset noNulls.
Arrays.fill(outputIsNull, false);
outputColVector.noNulls = true;
}
for (int i = 0; i != n; i++) {
evaluateString(inputCol, outputColVector, i);
}
}
} else /* there are nulls in the inputColVector */
{
// Carefully handle NULLs..
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
outputColVector.noNulls = false;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
evaluateString(inputCol, outputColVector, i);
}
}
} else {
for (int i = 0; i < n; i++) {
outputColVector.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
evaluateString(inputCol, outputColVector, i);
}
}
}
}
break;
default:
throw new Error("Invalid input type #0: " + primitiveCategory0.name());
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class VectorUDFTimestampFieldDate method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
Preconditions.checkState(((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory() == PrimitiveCategory.DATE);
if (childExpressions != null) {
super.evaluateChildren(batch);
}
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
ColumnVector inputColVec = batch.cols[this.colNum];
/* every line below this is identical for evaluateLong & evaluateString */
final int n = inputColVec.isRepeating ? 1 : batch.size;
int[] sel = batch.selected;
final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse;
boolean[] outputIsNull = outputColVector.isNull;
if (batch.size == 0) {
/* n != batch.size when isRepeating */
return;
}
// We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
LongColumnVector longColVector = (LongColumnVector) inputColVec;
if (inputColVec.isRepeating) {
if (inputColVec.noNulls || !inputColVec.isNull[0]) {
outputColVector.isNull[0] = false;
outputColVector.vector[0] = getDateField(longColVector.vector[0]);
} else {
outputColVector.isNull[0] = true;
outputColVector.noNulls = false;
}
outputColVector.isRepeating = true;
return;
}
if (inputColVec.noNulls) {
if (batch.selectedInUse) {
if (!outputColVector.noNulls) {
for (int j = 0; j != n; j++) {
final int i = sel[j];
// Set isNull before call in case it changes it mind.
outputIsNull[i] = false;
outputColVector.vector[i] = getDateField(longColVector.vector[i]);
}
} else {
for (int j = 0; j != n; j++) {
final int i = sel[j];
outputColVector.vector[i] = getDateField(longColVector.vector[i]);
}
}
} else {
if (!outputColVector.noNulls) {
// Assume it is almost always a performance win to fill all of isNull so we can
// safely reset noNulls.
Arrays.fill(outputIsNull, false);
outputColVector.noNulls = true;
}
for (int i = 0; i != n; i++) {
outputColVector.vector[i] = getDateField(longColVector.vector[i]);
}
}
} else /* there are nulls in the inputColVector */
{
// Carefully handle NULLs...
outputColVector.noNulls = false;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outputColVector.isNull[i] = inputColVec.isNull[i];
if (!inputColVec.isNull[i]) {
outputColVector.vector[i] = getDateField(longColVector.vector[i]);
}
}
} else {
for (int i = 0; i < n; i++) {
outputColVector.isNull[i] = inputColVec.isNull[i];
if (!inputColVec.isNull[i]) {
outputColVector.vector[i] = getDateField(longColVector.vector[i]);
}
}
}
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class VectorUDFTimestampFieldTimestamp method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
Preconditions.checkState(((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory() == PrimitiveCategory.TIMESTAMP);
if (childExpressions != null) {
super.evaluateChildren(batch);
}
LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
ColumnVector inputColVec = batch.cols[this.colNum];
/* every line below this is identical for evaluateLong & evaluateString */
final int n = inputColVec.isRepeating ? 1 : batch.size;
int[] sel = batch.selected;
final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse;
if (batch.size == 0) {
/* n != batch.size when isRepeating */
return;
}
// We do not need to do a column reset since we are carefully changing the output.
outV.isRepeating = false;
TimestampColumnVector timestampColVector = (TimestampColumnVector) inputColVec;
if (inputColVec.isRepeating) {
if (inputColVec.noNulls || !inputColVec.isNull[0]) {
outV.isNull[0] = false;
outV.vector[0] = getTimestampField(timestampColVector, 0);
} else {
outV.isNull[0] = true;
outV.noNulls = false;
}
outV.isRepeating = true;
return;
}
if (inputColVec.noNulls) {
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outV.isNull[i] = false;
outV.vector[i] = getTimestampField(timestampColVector, i);
}
} else {
Arrays.fill(outV.isNull, 0, n, false);
for (int i = 0; i < n; i++) {
outV.vector[i] = getTimestampField(timestampColVector, i);
}
}
} else /* there are nulls in the inputColVector */
{
// Carefully handle NULLs...
outV.noNulls = false;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outV.isNull[i] = inputColVec.isNull[i];
if (!inputColVec.isNull[i]) {
outV.vector[i] = getTimestampField(timestampColVector, i);
}
}
} else {
for (int i = 0; i < n; i++) {
outV.isNull[i] = inputColVec.isNull[i];
if (!inputColVec.isNull[i]) {
outV.vector[i] = getTimestampField(timestampColVector, i);
}
}
}
}
}
Aggregations