use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp in project phoenix by apache.
the class IndexPredicateAnalyzer method getColumnExpr.
// Check if ExprNodeColumnDesc is wrapped in expr.
// If so, peel off. Otherwise return itself.
private ExprNodeDesc getColumnExpr(ExprNodeDesc expr) {
if (expr instanceof ExprNodeColumnDesc) {
return expr;
}
ExprNodeGenericFuncDesc funcDesc = null;
if (expr instanceof ExprNodeGenericFuncDesc) {
funcDesc = (ExprNodeGenericFuncDesc) expr;
}
if (null == funcDesc) {
return expr;
}
GenericUDF udf = funcDesc.getGenericUDF();
// check if its a simple cast expression.
if ((udf instanceof GenericUDFBridge || udf instanceof GenericUDFToBinary || udf instanceof GenericUDFToChar || udf instanceof GenericUDFToVarchar || udf instanceof GenericUDFToDecimal || udf instanceof GenericUDFToDate || udf instanceof GenericUDFToUnixTimeStamp || udf instanceof GenericUDFToUtcTimestamp) && funcDesc.getChildren().size() == 1 && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) {
return expr.getChildren().get(0);
}
return expr;
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp in project flink by apache.
the class HiveParserRexNodeConverter method convertGenericFunc.
private RexNode convertGenericFunc(ExprNodeGenericFuncDesc func) throws SemanticException {
ExprNodeDesc tmpExprNode;
RexNode tmpRN;
List<RexNode> childRexNodeLst = new ArrayList<>();
List<RelDataType> argTypes = new ArrayList<>();
// TODO: 1) Expand to other functions as needed 2) What about types other than primitive.
TypeInfo tgtDT = null;
GenericUDF tgtUdf = func.getGenericUDF();
if (tgtUdf instanceof GenericUDFIn) {
return convertIN(func);
}
boolean isNumeric = isNumericBinary(func);
boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare;
boolean isWhenCase = tgtUdf instanceof GenericUDFWhen || tgtUdf instanceof GenericUDFCase;
boolean isTransformableTimeStamp = func.getGenericUDF() instanceof GenericUDFUnixTimeStamp && func.getChildren().size() != 0;
if (isNumeric) {
tgtDT = func.getTypeInfo();
assert func.getChildren().size() == 2;
// TODO: checking 2 children is useless, compare already does that.
} else if (isCompare && (func.getChildren().size() == 2)) {
tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo());
} else if (isWhenCase) {
// functions as they are not allowed
if (checkForStatefulFunctions(func.getChildren())) {
throw new SemanticException("Stateful expressions cannot be used inside of CASE");
}
} else if (isTransformableTimeStamp) {
func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), func.getChildren());
}
for (ExprNodeDesc childExpr : func.getChildren()) {
tmpExprNode = childExpr;
if (tgtDT != null && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) {
if (isCompare) {
// For compare, we will convert requisite children
tmpExprNode = HiveASTParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT);
} else if (isNumeric) {
// For numeric, we'll do minimum necessary cast - if we cast to the type
// of expression, bad things will happen.
PrimitiveTypeInfo minArgType = HiveParserExprNodeDescUtils.deriveMinArgumentCast(childExpr, tgtDT);
tmpExprNode = HiveASTParseUtils.createConversionCast(childExpr, minArgType);
} else {
throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare");
}
}
argTypes.add(HiveParserTypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory()));
tmpRN = convert(tmpExprNode);
childRexNodeLst.add(tmpRN);
}
// process the function
RelDataType retType = HiveParserTypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory());
SqlOperator calciteOp = HiveParserSqlFunctionConverter.getCalciteOperator(func.getFuncText(), func.getGenericUDF(), argTypes, retType);
if (calciteOp.getKind() == SqlKind.CASE) {
// If it is a case operator, we need to rewrite it
childRexNodeLst = rewriteCaseChildren(func, childRexNodeLst);
}
RexNode expr = cluster.getRexBuilder().makeCall(calciteOp, childRexNodeLst);
// check whether we need a calcite cast
RexNode cast = handleExplicitCast(func, childRexNodeLst, ((RexCall) expr).getOperator());
if (cast != null) {
expr = cast;
retType = cast.getType();
}
// an exception
if (flattenExpr && expr instanceof RexCall && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) {
RexCall call = (RexCall) expr;
expr = cluster.getRexBuilder().makeCall(retType, call.getOperator(), RexUtil.flatten(call.getOperands(), call.getOperator()));
}
return expr;
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp in project hive by apache.
the class KafkaScanTrimmer method getColumnExpr.
@SuppressWarnings("Duplicates")
private static ExprNodeDesc getColumnExpr(ExprNodeDesc expr) {
if (expr instanceof ExprNodeColumnDesc) {
return expr;
}
ExprNodeGenericFuncDesc funcDesc = null;
if (expr instanceof ExprNodeGenericFuncDesc) {
funcDesc = (ExprNodeGenericFuncDesc) expr;
}
if (null == funcDesc) {
return expr;
}
GenericUDF udf = funcDesc.getGenericUDF();
// check if its a simple cast expression.
if ((udf instanceof GenericUDFBridge || udf instanceof GenericUDFToBinary || udf instanceof GenericUDFToChar || udf instanceof GenericUDFToVarchar || udf instanceof GenericUDFToDecimal || udf instanceof GenericUDFToDate || udf instanceof GenericUDFToUnixTimeStamp || udf instanceof GenericUDFToUtcTimestamp) && funcDesc.getChildren().size() == 1 && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) {
return expr.getChildren().get(0);
}
return expr;
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp in project hive by apache.
the class RexNodeConverter method convert.
private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException {
ExprNodeDesc tmpExprNode;
RexNode tmpRN;
List<RexNode> childRexNodeLst = new ArrayList<>();
Builder<RelDataType> argTypeBldr = ImmutableList.<RelDataType>builder();
// TODO: 1) Expand to other functions as needed 2) What about types other than primitive.
TypeInfo tgtDT = null;
GenericUDF tgtUdf = func.getGenericUDF();
boolean isNumeric = (tgtUdf instanceof GenericUDFBaseBinary && func.getTypeInfo().getCategory() == Category.PRIMITIVE && (PrimitiveGrouping.NUMERIC_GROUP == PrimitiveObjectInspectorUtils.getPrimitiveGrouping(((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory())));
boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare;
boolean isWhenCase = tgtUdf instanceof GenericUDFWhen || tgtUdf instanceof GenericUDFCase;
boolean isTransformableTimeStamp = func.getGenericUDF() instanceof GenericUDFUnixTimeStamp && !func.getChildren().isEmpty();
boolean isBetween = !isNumeric && tgtUdf instanceof GenericUDFBetween;
boolean isIN = !isNumeric && tgtUdf instanceof GenericUDFIn;
boolean isAllPrimitive = true;
if (isNumeric) {
tgtDT = func.getTypeInfo();
assert func.getChildren().size() == 2;
// TODO: checking 2 children is useless, compare already does that.
} else if (isCompare && (func.getChildren().size() == 2)) {
tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo());
} else if (isWhenCase) {
// as they are not allowed
if (checkForStatefulFunctions(func.getChildren())) {
throw new SemanticException("Stateful expressions cannot be used inside of CASE");
}
} else if (isTransformableTimeStamp) {
// unix_timestamp(args) -> to_unix_timestamp(args)
func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), func.getChildren());
} else if (isBetween) {
assert func.getChildren().size() == 4;
// We skip first child as is not involved (is the revert boolean)
// The target type needs to account for all 3 operands
tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(1).getTypeInfo(), FunctionRegistry.getCommonClassForComparison(func.getChildren().get(2).getTypeInfo(), func.getChildren().get(3).getTypeInfo()));
} else if (isIN) {
// We're only considering the first element of the IN list for the type
assert func.getChildren().size() > 1;
tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo());
}
for (int i = 0; i < func.getChildren().size(); ++i) {
ExprNodeDesc childExpr = func.getChildren().get(i);
tmpExprNode = childExpr;
if (tgtDT != null && tgtDT.getCategory() == Category.PRIMITIVE && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) {
if (isCompare || isBetween || isIN) {
// For BETWEEN skip the first child (the revert boolean)
if (!isBetween || i > 0) {
tmpExprNode = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT);
}
} else if (isNumeric) {
// For numeric, we'll do minimum necessary cast - if we cast to the type
// of expression, bad things will happen.
PrimitiveTypeInfo minArgType = ExprNodeDescUtils.deriveMinArgumentCast(childExpr, tgtDT);
tmpExprNode = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(childExpr, minArgType);
} else {
throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare");
}
}
isAllPrimitive = isAllPrimitive && tmpExprNode.getTypeInfo().getCategory() == Category.PRIMITIVE;
argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), typeFactory));
tmpRN = convert(tmpExprNode);
childRexNodeLst.add(tmpRN);
}
// See if this is an explicit cast.
RelDataType retType = TypeConverter.convert(func.getTypeInfo(), typeFactory);
RexNode expr = handleExplicitCast(func.getGenericUDF(), retType, childRexNodeLst, rexBuilder);
if (expr == null) {
// This is not a cast; process the function.
SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(func.getFuncText(), func.getGenericUDF(), argTypeBldr.build(), retType);
if (calciteOp.getKind() == SqlKind.CASE) {
// If it is a case operator, we need to rewrite it
childRexNodeLst = rewriteCaseChildren(func.getFuncText(), childRexNodeLst, rexBuilder);
// Adjust branch types by inserting explicit casts if the actual is ambiguous
childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType, rexBuilder);
} else if (HiveExtractDate.ALL_FUNCTIONS.contains(calciteOp)) {
// If it is a extract operator, we need to rewrite it
childRexNodeLst = rewriteExtractDateChildren(calciteOp, childRexNodeLst, rexBuilder);
} else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) {
// If it is a floor <date> operator, we need to rewrite it
childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst, rexBuilder);
} else if (calciteOp.getKind() == SqlKind.IN && isAllPrimitive) {
if (childRexNodeLst.size() == 2) {
// if it is a single item in an IN clause, transform A IN (B) to A = B
// from IN [A,B] => EQUALS [A,B]
// except complex types
calciteOp = SqlStdOperatorTable.EQUALS;
} else if (RexUtil.isReferenceOrAccess(childRexNodeLst.get(0), true)) {
// if it is more than an single item in an IN clause,
// transform from IN [A,B,C] => OR [EQUALS [A,B], EQUALS [A,C]]
// except complex types
// Rewrite to OR is done only if number of operands are less than
// the threshold configured
childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst, rexBuilder);
calciteOp = SqlStdOperatorTable.OR;
}
} else if (calciteOp.getKind() == SqlKind.COALESCE && childRexNodeLst.size() > 1) {
// Rewrite COALESCE as a CASE
// This allows to be further reduced to OR, if possible
calciteOp = SqlStdOperatorTable.CASE;
childRexNodeLst = rewriteCoalesceChildren(childRexNodeLst, rexBuilder);
// Adjust branch types by inserting explicit casts if the actual is ambiguous
childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType, rexBuilder);
} else if (calciteOp == HiveToDateSqlOperator.INSTANCE) {
childRexNodeLst = rewriteToDateChildren(childRexNodeLst, rexBuilder);
} else if (calciteOp.getKind() == SqlKind.BETWEEN) {
assert childRexNodeLst.get(0).isAlwaysTrue() || childRexNodeLst.get(0).isAlwaysFalse();
childRexNodeLst = rewriteBetweenChildren(childRexNodeLst, rexBuilder);
if (childRexNodeLst.get(0).isAlwaysTrue()) {
calciteOp = SqlStdOperatorTable.OR;
} else {
calciteOp = SqlStdOperatorTable.AND;
}
}
expr = rexBuilder.makeCall(retType, calciteOp, childRexNodeLst);
} else {
retType = expr.getType();
}
// an exception
if (expr instanceof RexCall && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) {
RexCall call = (RexCall) expr;
expr = rexBuilder.makeCall(retType, call.getOperator(), RexUtil.flatten(call.getOperands(), call.getOperator()));
}
return expr;
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp in project hive by apache.
the class TestVectorizationContext method testTimeStampUdfs.
@Test
public void testTimeStampUdfs() throws HiveException {
ExprNodeGenericFuncDesc tsFuncExpr = new ExprNodeGenericFuncDesc();
tsFuncExpr.setTypeInfo(TypeInfoFactory.intTypeInfo);
ExprNodeColumnDesc colDesc1 = new ExprNodeColumnDesc(TypeInfoFactory.timestampTypeInfo, "a", "table", false);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(colDesc1);
List<String> columns = new ArrayList<String>();
columns.add("b");
columns.add("a");
VectorizationContext vc = new VectorizationContext("name", columns);
// UDFYear
GenericUDFBridge gudfBridge = new GenericUDFBridge("year", false, UDFYear.class.getName());
tsFuncExpr.setGenericUDF(gudfBridge);
tsFuncExpr.setChildren(children);
VectorExpression ve = vc.getVectorExpression(tsFuncExpr);
Assert.assertEquals(VectorUDFYearTimestamp.class, ve.getClass());
// GenericUDFToUnixTimeStamp
GenericUDFToUnixTimeStamp gudf = new GenericUDFToUnixTimeStamp();
tsFuncExpr.setGenericUDF(gudf);
tsFuncExpr.setTypeInfo(TypeInfoFactory.longTypeInfo);
ve = vc.getVectorExpression(tsFuncExpr);
Assert.assertEquals(VectorUDFUnixTimeStampTimestamp.class, ve.getClass());
}
Aggregations