use of org.apache.hadoop.hive.ql.plan.PTFDesc in project hive by apache.
the class PTFTranslator method translate.
public PTFDesc translate(WindowingSpec wdwSpec, SemanticAnalyzer semAly, HiveConf hCfg, RowResolver inputRR, UnparseTranslator unparseT) throws SemanticException {
init(semAly, hCfg, inputRR, unparseT);
windowingSpec = wdwSpec;
ptfDesc = new PTFDesc();
ptfDesc.setCfg(hCfg);
ptfDesc.setLlInfo(llInfo);
WindowTableFunctionDef wdwTFnDef = new WindowTableFunctionDef();
ptfDesc.setFuncDef(wdwTFnDef);
PTFQueryInputSpec inpSpec = new PTFQueryInputSpec();
inpSpec.setType(PTFQueryInputType.WINDOWING);
wdwTFnDef.setInput(translate(inpSpec, 0));
ShapeDetails inpShape = wdwTFnDef.getInput().getOutputShape();
WindowingTableFunctionResolver tFn = (WindowingTableFunctionResolver) FunctionRegistry.getTableFunctionResolver(FunctionRegistry.WINDOWING_TABLE_FUNCTION);
if (tFn == null) {
throw new SemanticException(String.format("Internal Error: Unknown Table Function %s", FunctionRegistry.WINDOWING_TABLE_FUNCTION));
}
wdwTFnDef.setName(FunctionRegistry.WINDOWING_TABLE_FUNCTION);
wdwTFnDef.setResolverClassName(tFn.getClass().getName());
wdwTFnDef.setAlias("ptf_" + 1);
wdwTFnDef.setExpressionTreeString(null);
wdwTFnDef.setTransformsRawInput(false);
tFn.initialize(hCfg, ptfDesc, wdwTFnDef);
TableFunctionEvaluator tEval = tFn.getEvaluator();
wdwTFnDef.setTFunction(tEval);
wdwTFnDef.setCarryForwardNames(tFn.carryForwardNames());
wdwTFnDef.setRawInputShape(inpShape);
PartitioningSpec partiSpec = wdwSpec.getQueryPartitioningSpec();
if (partiSpec == null) {
throw new SemanticException("Invalid use of Windowing: there is no Partitioning associated with Windowing");
}
PartitionDef partDef = translate(inpShape, wdwSpec.getQueryPartitionSpec());
OrderDef ordDef = translate(inpShape, wdwSpec.getQueryOrderSpec(), partDef);
wdwTFnDef.setPartition(partDef);
wdwTFnDef.setOrder(ordDef);
/*
* process Wdw functions
*/
ArrayList<WindowFunctionDef> windowFunctions = new ArrayList<WindowFunctionDef>();
if (wdwSpec.getWindowExpressions() != null) {
for (WindowExpressionSpec expr : wdwSpec.getWindowExpressions()) {
if (expr instanceof WindowFunctionSpec) {
WindowFunctionDef wFnDef = translate(wdwTFnDef, (WindowFunctionSpec) expr);
windowFunctions.add(wFnDef);
}
}
wdwTFnDef.setWindowFunctions(windowFunctions);
}
/*
* set outputFromWdwFnProcessing
*/
ArrayList<String> aliases = new ArrayList<String>();
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
for (WindowFunctionDef wFnDef : windowFunctions) {
aliases.add(wFnDef.getAlias());
if (wFnDef.isPivotResult()) {
fieldOIs.add(((ListObjectInspector) wFnDef.getOI()).getListElementObjectInspector());
} else {
fieldOIs.add(wFnDef.getOI());
}
}
PTFTranslator.addInputColumnsToList(inpShape, aliases, fieldOIs);
StructObjectInspector wdwOutOI = ObjectInspectorFactory.getStandardStructObjectInspector(aliases, fieldOIs);
tFn.setWdwProcessingOutputOI(wdwOutOI);
RowResolver wdwOutRR = buildRowResolverForWindowing(wdwTFnDef);
ShapeDetails wdwOutShape = setupShape(wdwOutOI, null, wdwOutRR);
wdwTFnDef.setOutputShape(wdwOutShape);
tFn.setupOutputOI();
PTFDeserializer.alterOutputOIForStreaming(ptfDesc);
return ptfDesc;
}
use of org.apache.hadoop.hive.ql.plan.PTFDesc in project hive by apache.
the class SemanticAnalyzer method genPTFPlanForComponentQuery.
private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operator input) throws SemanticException {
/*
* 1. Create the PTFDesc from the Qspec attached to this QB.
*/
RowResolver rr = opParseCtx.get(input).getRowResolver();
PTFDesc ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
/*
* 2. build Map-side Op Graph. Graph template is either:
* Input -> PTF_map -> ReduceSink
* or
* Input -> ReduceSink
*
* Here the ExprNodeDescriptors in the QueryDef are based on the Input Operator's RR.
*/
{
PartitionedTableFunctionDef tabDef = ptfDesc.getStartOfChain();
/*
* a. add Map-side PTF Operator if needed
*/
if (tabDef.isTransformsRawInput()) {
RowResolver ptfMapRR = tabDef.getRawInputShape().getRr();
ptfDesc.setMapSide(true);
input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfMapRR.getColumnInfos()), input), ptfMapRR);
rr = opParseCtx.get(input).getRowResolver();
}
/*
* b. Build Reduce Sink Details (keyCols, valueCols, outColNames etc.) for this ptfDesc.
*/
List<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
List<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>();
StringBuilder orderString = new StringBuilder();
StringBuilder nullOrderString = new StringBuilder();
/*
* Use the input RR of TableScanOperator in case there is no map-side
* reshape of input.
* If the parent of ReduceSinkOperator is PTFOperator, use it's
* output RR.
*/
buildPTFReduceSinkDetails(tabDef, partCols, orderCols, orderString, nullOrderString);
input = genReduceSinkPlan(input, partCols, orderCols, orderString.toString(), nullOrderString.toString(), -1, Operation.NOT_ACID, false);
}
/*
* 3. build Reduce-side Op Graph
*/
{
/*
* c. Rebuilt the QueryDef.
* Why?
* - so that the ExprNodeDescriptors in the QueryDef are based on the
* Select Operator's RowResolver
*/
rr = opParseCtx.get(input).getRowResolver();
ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
/*
* d. Construct PTF Operator.
*/
RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr();
input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfOpRR.getColumnInfos()), input), ptfOpRR);
}
return input;
}
use of org.apache.hadoop.hive.ql.plan.PTFDesc in project hive by apache.
the class Vectorizer method validatePTFOperator.
private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
if (!isPtfVectorizationEnabled) {
setNodeIssue("Vectorization of PTF is not enabled (" + HiveConf.ConfVars.HIVE_VECTORIZATION_PTF_ENABLED.varname + " IS false)");
return false;
}
PTFDesc ptfDesc = op.getConf();
boolean isMapSide = ptfDesc.isMapSide();
if (isMapSide) {
setOperatorIssue("PTF Mapper not supported");
return false;
}
List<Operator<? extends OperatorDesc>> ptfParents = op.getParentOperators();
if (ptfParents != null && ptfParents.size() > 0) {
Operator<? extends OperatorDesc> ptfParent = op.getParentOperators().get(0);
if (!(ptfParent instanceof ReduceSinkOperator)) {
boolean isReduceShufflePtf = false;
if (ptfParent instanceof SelectOperator) {
ptfParents = ptfParent.getParentOperators();
if (ptfParents == null || ptfParents.size() == 0) {
isReduceShufflePtf = true;
} else {
ptfParent = ptfParent.getParentOperators().get(0);
isReduceShufflePtf = (ptfParent instanceof ReduceSinkOperator);
}
}
if (!isReduceShufflePtf) {
setOperatorIssue("Only PTF directly under reduce-shuffle is supported");
return false;
}
}
}
boolean forNoop = ptfDesc.forNoop();
if (forNoop) {
setOperatorIssue("NOOP not supported");
return false;
}
boolean forWindowing = ptfDesc.forWindowing();
if (!forWindowing) {
setOperatorIssue("Windowing required");
return false;
}
PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
boolean isWindowTableFunctionDef = (funcDef instanceof WindowTableFunctionDef);
if (!isWindowTableFunctionDef) {
setOperatorIssue("Must be a WindowTableFunctionDef");
return false;
}
try {
createVectorPTFDesc(op, ptfDesc, vContext, vectorPTFDesc, vectorizedPTFMaxMemoryBufferingBatchCount);
} catch (HiveException e) {
setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e));
return false;
}
// Output columns ok?
String[] outputColumnNames = vectorPTFDesc.getOutputColumnNames();
TypeInfo[] outputTypeInfos = vectorPTFDesc.getOutputTypeInfos();
final int outputCount = outputColumnNames.length;
for (int i = 0; i < outputCount; i++) {
String typeName = outputTypeInfos[i].getTypeName();
boolean ret = validateDataType(typeName, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */
false);
if (!ret) {
setExpressionIssue("PTF Output Columns", "Data type " + typeName + " of column " + outputColumnNames[i] + " not supported");
return false;
}
}
boolean[] distinctEvaluator = vectorPTFDesc.getEvaluatorsAreDistinct();
String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
final int count = evaluatorFunctionNames.length;
WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
for (int i = 0; i < count; i++) {
String functionName = evaluatorFunctionNames[i];
SupportedFunctionType supportedFunctionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
if (supportedFunctionType == null) {
setOperatorIssue(functionName + " not in supported functions " + VectorPTFDesc.supportedFunctionNames);
return false;
}
if (distinctEvaluator[i] && !supportedFunctionType.isSupportDistinct()) {
setOperatorIssue(functionName + " distinct is not supported ");
return false;
}
WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
final boolean isSingleParameter = (exprNodeDescList != null && exprNodeDescList.size() == 1);
final ExprNodeDesc singleExprNodeDesc = (isSingleParameter ? exprNodeDescList.get(0) : null);
final TypeInfo singleTypeInfo = (isSingleParameter ? singleExprNodeDesc.getTypeInfo() : null);
final PrimitiveCategory singlePrimitiveCategory = (singleTypeInfo instanceof PrimitiveTypeInfo ? ((PrimitiveTypeInfo) singleTypeInfo).getPrimitiveCategory() : null);
switch(windowFrameDef.getWindowType()) {
case RANGE:
if (!windowFrameDef.getEnd().isCurrentRow()) {
setOperatorIssue(functionName + " only CURRENT ROW end frame is supported for RANGE");
return false;
}
break;
case ROWS:
{
boolean isRowEndCurrent = (windowFrameDef.getEnd().isCurrentRow() && (supportedFunctionType == SupportedFunctionType.AVG || supportedFunctionType == SupportedFunctionType.MAX || supportedFunctionType == SupportedFunctionType.MIN || supportedFunctionType == SupportedFunctionType.SUM) && isSingleParameter && singlePrimitiveCategory != null);
if (!isRowEndCurrent && !windowFrameDef.isEndUnbounded()) {
setOperatorIssue(functionName + " UNBOUNDED end frame is required for ROWS window type");
return false;
}
}
break;
default:
throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType());
}
// RANK/DENSE_RANK don't care about columns.
if (supportedFunctionType != SupportedFunctionType.RANK && supportedFunctionType != SupportedFunctionType.DENSE_RANK) {
if (exprNodeDescList != null) {
// LEAD and LAG now supports multiple arguments in vectorized mode
if (exprNodeDescList.size() > 1 && supportedFunctionType != SupportedFunctionType.LAG && supportedFunctionType != SupportedFunctionType.LEAD) {
setOperatorIssue("More than 1 argument expression of aggregation function " + functionName);
return false;
}
ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
if (containsLeadLag(exprNodeDesc)) {
setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName);
return false;
}
if (supportedFunctionType != SupportedFunctionType.COUNT) {
// COUNT does not care about column types. The rest do.
TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
Category category = typeInfo.getCategory();
boolean isSupportedType;
if (category != Category.PRIMITIVE) {
isSupportedType = false;
} else {
ColumnVector.Type colVecType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
switch(colVecType) {
case LONG:
case DOUBLE:
case DECIMAL:
isSupportedType = true;
break;
default:
isSupportedType = false;
break;
}
}
if (!isSupportedType) {
setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName);
return false;
}
}
}
}
if (vectorPTFDesc.getOrderExprNodeDescs().length > 1) {
/*
* Currently, we need to rule out here all cases where a range boundary scanner can run,
* basically: 1. bounded start 2. bounded end which is not current row
*/
if (windowFrameDef.getWindowType() == WindowType.RANGE && (!windowFrameDef.isStartUnbounded() || !(windowFrameDef.getEnd().isCurrentRow() || windowFrameDef.isEndUnbounded()))) {
setOperatorIssue("Multi-column ordered RANGE boundary scanner is not supported in vectorized mode (window: " + windowFrameDef + ")");
return false;
}
}
}
return true;
}
use of org.apache.hadoop.hive.ql.plan.PTFDesc in project hive by apache.
the class Vectorizer method vectorizePTFOperator.
/*
* NOTE: The VectorPTFDesc has already been allocated and populated.
*/
public static Operator<? extends OperatorDesc> vectorizePTFOperator(Operator<? extends OperatorDesc> ptfOp, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
PTFDesc ptfDesc = (PTFDesc) ptfOp.getConf();
VectorPTFInfo vectorPTFInfo = createVectorPTFInfo(ptfOp, ptfDesc, vContext, vectorPTFDesc);
vectorPTFDesc.setVectorPTFInfo(vectorPTFInfo);
Class<? extends Operator<?>> opClass = VectorPTFOperator.class;
return OperatorFactory.getVectorOperator(opClass, ptfOp.getCompilationOpContext(), ptfOp.getConf(), vContext, vectorPTFDesc);
}
use of org.apache.hadoop.hive.ql.plan.PTFDesc in project hive by apache.
the class TestVectorPTFGroupBatches method getFakeOperator.
private VectorPTFOperator getFakeOperator() throws HiveException {
VectorPTFDesc vectorPTFDesc = new VectorPTFDesc();
vectorPTFDesc.setVectorPTFInfo(new VectorPTFInfo());
vectorPTFDesc.setOutputColumnNames(new String[0]);
vectorPTFDesc.setEvaluatorFunctionNames(new String[0]);
return new VectorPTFOperator(new CompilationOpContext(), new PTFDesc(), new VectorizationContext("fake"), vectorPTFDesc);
}
Aggregations