use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.
the class PTFTranslator method translate.
private PartitionedTableFunctionDef translate(PartitionedTableFunctionSpec spec, PTFInputDef inpDef, int inpNum) throws SemanticException {
TableFunctionResolver tFn = FunctionRegistry.getTableFunctionResolver(spec.getName());
if (tFn == null) {
throw new SemanticException(String.format("Unknown Table Function %s", spec.getName()));
}
PartitionedTableFunctionDef def = new PartitionedTableFunctionDef();
def.setInput(inpDef);
def.setName(spec.getName());
def.setResolverClassName(tFn.getClass().getName());
def.setAlias(spec.getAlias() == null ? "ptf_" + inpNum : spec.getAlias());
def.setExpressionTreeString(spec.getAstNode().toStringTree());
def.setTransformsRawInput(tFn.transformsRawInput());
/*
* translate args
*/
List<ASTNode> args = spec.getArgs();
if (args != null) {
for (ASTNode expr : args) {
PTFExpressionDef argDef = null;
try {
argDef = buildExpressionDef(inpDef.getOutputShape(), expr);
} catch (HiveException he) {
throw new SemanticException(he);
}
def.addArg(argDef);
}
}
tFn.initialize(hCfg, ptfDesc, def);
TableFunctionEvaluator tEval = tFn.getEvaluator();
def.setTFunction(tEval);
def.setCarryForwardNames(tFn.carryForwardNames());
tFn.setupRawInputOI();
if (tFn.transformsRawInput()) {
StructObjectInspector rawInOutOI = tEval.getRawInputOI();
List<String> rawInOutColNames = tFn.getRawInputColumnNames();
RowResolver rawInRR = buildRowResolverForPTF(def.getName(), spec.getAlias(), rawInOutOI, rawInOutColNames, inpDef.getOutputShape().getRr());
ShapeDetails rawInpShape = setupTableFnShape(def.getName(), inpDef.getOutputShape(), rawInOutOI, rawInOutColNames, rawInRR);
def.setRawInputShape(rawInpShape);
} else {
def.setRawInputShape(inpDef.getOutputShape());
}
translatePartitioning(def, spec);
tFn.setupOutputOI();
StructObjectInspector outputOI = tEval.getOutputOI();
List<String> outColNames = tFn.getOutputColumnNames();
RowResolver outRR = buildRowResolverForPTF(def.getName(), spec.getAlias(), outputOI, outColNames, def.getRawInputShape().getRr());
ShapeDetails outputShape = setupTableFnShape(def.getName(), inpDef.getOutputShape(), outputOI, outColNames, outRR);
def.setOutputShape(outputShape);
def.setReferencedColumns(tFn.getReferencedColumns());
return def;
}
use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.
the class PTFDeserializer method alterOutputOIForStreaming.
/*
* If the final PTF in a PTFChain can stream its output, then set the OI of its OutputShape
* to the OI returned by the TableFunctionEvaluator.
*/
public static void alterOutputOIForStreaming(PTFDesc ptfDesc) {
PartitionedTableFunctionDef tDef = ptfDesc.getFuncDef();
TableFunctionEvaluator tEval = tDef.getTFunction();
if (tEval.canIterateOutput()) {
tDef.getOutputShape().setOI(tEval.getOutputOI());
}
}
use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.
the class SemanticAnalyzer method genPTFPlanForComponentQuery.
private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operator input) throws SemanticException {
/*
* 1. Create the PTFDesc from the Qspec attached to this QB.
*/
RowResolver rr = opParseCtx.get(input).getRowResolver();
PTFDesc ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
/*
* 2. build Map-side Op Graph. Graph template is either:
* Input -> PTF_map -> ReduceSink
* or
* Input -> ReduceSink
*
* Here the ExprNodeDescriptors in the QueryDef are based on the Input Operator's RR.
*/
{
PartitionedTableFunctionDef tabDef = ptfDesc.getStartOfChain();
/*
* a. add Map-side PTF Operator if needed
*/
if (tabDef.isTransformsRawInput()) {
RowResolver ptfMapRR = tabDef.getRawInputShape().getRr();
ptfDesc.setMapSide(true);
input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfMapRR.getColumnInfos()), input), ptfMapRR);
rr = opParseCtx.get(input).getRowResolver();
}
/*
* b. Build Reduce Sink Details (keyCols, valueCols, outColNames etc.) for this ptfDesc.
*/
ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>();
StringBuilder orderString = new StringBuilder();
StringBuilder nullOrderString = new StringBuilder();
/*
* Use the input RR of TableScanOperator in case there is no map-side
* reshape of input.
* If the parent of ReduceSinkOperator is PTFOperator, use it's
* output RR.
*/
buildPTFReduceSinkDetails(tabDef, rr, partCols, orderCols, orderString, nullOrderString);
input = genReduceSinkPlan(input, partCols, orderCols, orderString.toString(), nullOrderString.toString(), -1, Operation.NOT_ACID);
}
/*
* 3. build Reduce-side Op Graph
*/
{
/*
* c. Rebuilt the QueryDef.
* Why?
* - so that the ExprNodeDescriptors in the QueryDef are based on the
* Select Operator's RowResolver
*/
rr = opParseCtx.get(input).getRowResolver();
ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
/*
* d. Construct PTF Operator.
*/
RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr();
input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfOpRR.getColumnInfos()), input), ptfOpRR);
}
return input;
}
use of org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef in project hive by apache.
the class Vectorizer method validatePTFOperator.
private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
if (!isPtfVectorizationEnabled) {
setNodeIssue("Vectorization of PTF is not enabled (" + HiveConf.ConfVars.HIVE_VECTORIZATION_PTF_ENABLED.varname + " IS false)");
return false;
}
PTFDesc ptfDesc = (PTFDesc) op.getConf();
boolean isMapSide = ptfDesc.isMapSide();
if (isMapSide) {
setOperatorIssue("PTF Mapper not supported");
return false;
}
List<Operator<? extends OperatorDesc>> ptfParents = op.getParentOperators();
if (ptfParents != null && ptfParents.size() > 0) {
Operator<? extends OperatorDesc> ptfParent = op.getParentOperators().get(0);
if (!(ptfParent instanceof ReduceSinkOperator)) {
boolean isReduceShufflePtf = false;
if (ptfParent instanceof SelectOperator) {
ptfParents = ptfParent.getParentOperators();
if (ptfParents == null || ptfParents.size() == 0) {
isReduceShufflePtf = true;
} else {
ptfParent = ptfParent.getParentOperators().get(0);
isReduceShufflePtf = (ptfParent instanceof ReduceSinkOperator);
}
}
if (!isReduceShufflePtf) {
setOperatorIssue("Only PTF directly under reduce-shuffle is supported");
return false;
}
}
}
boolean forNoop = ptfDesc.forNoop();
if (forNoop) {
setOperatorIssue("NOOP not supported");
return false;
}
boolean forWindowing = ptfDesc.forWindowing();
if (!forWindowing) {
setOperatorIssue("Windowing required");
return false;
}
PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
boolean isWindowTableFunctionDef = (funcDef instanceof WindowTableFunctionDef);
if (!isWindowTableFunctionDef) {
setOperatorIssue("Must be a WindowTableFunctionDef");
return false;
}
try {
createVectorPTFDesc(op, ptfDesc, vContext, vectorPTFDesc, vectorizedPTFMaxMemoryBufferingBatchCount);
} catch (HiveException e) {
setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e));
return false;
}
// Output columns ok?
String[] outputColumnNames = vectorPTFDesc.getOutputColumnNames();
TypeInfo[] outputTypeInfos = vectorPTFDesc.getOutputTypeInfos();
final int outputCount = outputColumnNames.length;
for (int i = 0; i < outputCount; i++) {
String typeName = outputTypeInfos[i].getTypeName();
boolean ret = validateDataType(typeName, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */
false);
if (!ret) {
setExpressionIssue("PTF Output Columns", "Data type " + typeName + " of column " + outputColumnNames[i] + " not supported");
return false;
}
}
boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy();
String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
final int count = evaluatorFunctionNames.length;
WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
for (int i = 0; i < count; i++) {
String functionName = evaluatorFunctionNames[i];
SupportedFunctionType supportedFunctionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
if (supportedFunctionType == null) {
setOperatorIssue(functionName + " not in supported functions " + VectorPTFDesc.supportedFunctionNames);
return false;
}
WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
if (!windowFrameDef.isStartUnbounded()) {
setOperatorIssue(functionName + " only UNBOUNDED start frame is supported");
return false;
}
switch(windowFrameDef.getWindowType()) {
case RANGE:
if (!windowFrameDef.getEnd().isCurrentRow()) {
setOperatorIssue(functionName + " only CURRENT ROW end frame is supported for RANGE");
return false;
}
break;
case ROWS:
if (!windowFrameDef.isEndUnbounded()) {
setOperatorIssue(functionName + " UNBOUNDED end frame is not supported for ROWS window type");
return false;
}
break;
default:
throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType());
}
List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
if (exprNodeDescList != null && exprNodeDescList.size() > 1) {
setOperatorIssue("More than 1 argument expression of aggregation function " + functionName);
return false;
}
if (exprNodeDescList != null) {
ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
if (containsLeadLag(exprNodeDesc)) {
setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName);
return false;
}
if (supportedFunctionType != SupportedFunctionType.COUNT && supportedFunctionType != SupportedFunctionType.DENSE_RANK && supportedFunctionType != SupportedFunctionType.RANK) {
// COUNT, DENSE_RANK, and RANK do not care about column types. The rest do.
TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
Category category = typeInfo.getCategory();
boolean isSupportedType;
if (category != Category.PRIMITIVE) {
isSupportedType = false;
} else {
ColumnVector.Type colVecType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
switch(colVecType) {
case LONG:
case DOUBLE:
case DECIMAL:
isSupportedType = true;
break;
default:
isSupportedType = false;
break;
}
}
if (!isSupportedType) {
setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName);
return false;
}
}
}
}
return true;
}
Aggregations