use of org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef in project hive by apache.
the class WindowingTableFunction method iterator.
@SuppressWarnings("rawtypes")
@Override
public Iterator<Object> iterator(PTFPartitionIterator<Object> pItr) throws HiveException {
WindowTableFunctionDef wTFnDef = (WindowTableFunctionDef) getTableDef();
ArrayList<Object> output = new ArrayList<Object>();
List<?>[] outputFromPivotFunctions = new List<?>[wTFnDef.getWindowFunctions().size()];
ArrayList<Integer> wFnsWithWindows = new ArrayList<Integer>();
PTFPartition iPart = pItr.getPartition();
int i = 0;
for (WindowFunctionDef wFn : wTFnDef.getWindowFunctions()) {
boolean processWindow = processWindow(wFn.getWindowFrame());
pItr.reset();
if (!processWindow && !wFn.isPivotResult()) {
Object out = evaluateFunctionOnPartition(wFn, iPart);
output.add(out);
} else if (wFn.isPivotResult()) {
GenericUDAFEvaluator streamingEval = wFn.getWFnEval().getWindowingEvaluator(wFn.getWindowFrame());
if (streamingEval != null && streamingEval instanceof ISupportStreamingModeForWindowing) {
ISupportStreamingModeForWindowing strEval = (ISupportStreamingModeForWindowing) streamingEval;
if (strEval.getRowsRemainingAfterTerminate() == 0) {
wFn.setWFnEval(streamingEval);
if (wFn.getOI() instanceof ListObjectInspector) {
ListObjectInspector listOI = (ListObjectInspector) wFn.getOI();
wFn.setOI(listOI.getListElementObjectInspector());
}
output.add(null);
wFnsWithWindows.add(i);
} else {
outputFromPivotFunctions[i] = (List) evaluateFunctionOnPartition(wFn, iPart);
output.add(null);
}
} else {
outputFromPivotFunctions[i] = (List) evaluateFunctionOnPartition(wFn, iPart);
output.add(null);
}
} else {
output.add(null);
wFnsWithWindows.add(i);
}
i++;
}
for (i = 0; i < iPart.getOutputOI().getAllStructFieldRefs().size(); i++) {
output.add(null);
}
if (wTFnDef.getRankLimit() != -1) {
rnkLimitDef = new RankLimit(wTFnDef.getRankLimit(), wTFnDef.getRankLimitFunction(), wTFnDef.getWindowFunctions());
}
return new WindowingIterator(iPart, output, outputFromPivotFunctions, ArrayUtils.toPrimitive(wFnsWithWindows.toArray(new Integer[wFnsWithWindows.size()])));
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef in project hive by apache.
the class Vectorizer method createVectorPTFDesc.
/*
* Update the VectorPTFDesc with data that is used during validation and that doesn't rely on
* VectorizationContext to lookup column names, etc.
*/
private static void createVectorPTFDesc(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc, int vectorizedPTFMaxMemoryBufferingBatchCount) throws HiveException {
PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
WindowTableFunctionDef windowTableFunctionDef = (WindowTableFunctionDef) funcDef;
List<WindowFunctionDef> windowsFunctions = windowTableFunctionDef.getWindowFunctions();
final int functionCount = windowsFunctions.size();
ArrayList<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
final int outputSize = outputSignature.size();
/*
* Output columns.
*/
// Evaluator results are first.
String[] outputColumnNames = new String[outputSize];
TypeInfo[] outputTypeInfos = new TypeInfo[outputSize];
for (int i = 0; i < functionCount; i++) {
ColumnInfo colInfo = outputSignature.get(i);
TypeInfo typeInfo = colInfo.getType();
outputColumnNames[i] = colInfo.getInternalName();
outputTypeInfos[i] = typeInfo;
}
// Followed by key and non-key input columns (some may be missing).
for (int i = functionCount; i < outputSize; i++) {
ColumnInfo colInfo = outputSignature.get(i);
outputColumnNames[i] = colInfo.getInternalName();
outputTypeInfos[i] = colInfo.getType();
}
List<PTFExpressionDef> partitionExpressions = funcDef.getPartition().getExpressions();
final int partitionKeyCount = partitionExpressions.size();
ExprNodeDesc[] partitionExprNodeDescs = getPartitionExprNodeDescs(partitionExpressions);
List<OrderExpressionDef> orderExpressions = funcDef.getOrder().getExpressions();
final int orderKeyCount = orderExpressions.size();
ExprNodeDesc[] orderExprNodeDescs = getOrderExprNodeDescs(orderExpressions);
// When there are PARTITION and ORDER BY clauses, will have different partition expressions.
// Otherwise, only order by expressions.
boolean isPartitionOrderBy = false;
if (partitionKeyCount != orderKeyCount) {
// Obviously different expressions.
isPartitionOrderBy = true;
} else {
// Check each ExprNodeDesc.
for (int i = 0; i < partitionKeyCount; i++) {
final ExprNodeDescEqualityWrapper partitionExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(partitionExprNodeDescs[i]);
final ExprNodeDescEqualityWrapper orderExprEqualityWrapper = new ExprNodeDesc.ExprNodeDescEqualityWrapper(orderExprNodeDescs[i]);
if (!partitionExprEqualityWrapper.equals(orderExprEqualityWrapper)) {
isPartitionOrderBy = true;
break;
}
}
}
String[] evaluatorFunctionNames = new String[functionCount];
WindowFrameDef[] evaluatorWindowFrameDefs = new WindowFrameDef[functionCount];
List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = (List<ExprNodeDesc>[]) new List<?>[functionCount];
fillInPTFEvaluators(windowsFunctions, evaluatorFunctionNames, evaluatorWindowFrameDefs, evaluatorInputExprNodeDescLists);
TypeInfo[] reducerBatchTypeInfos = vContext.getAllTypeInfos();
vectorPTFDesc.setReducerBatchTypeInfos(reducerBatchTypeInfos);
vectorPTFDesc.setIsPartitionOrderBy(isPartitionOrderBy);
vectorPTFDesc.setOrderExprNodeDescs(orderExprNodeDescs);
vectorPTFDesc.setPartitionExprNodeDescs(partitionExprNodeDescs);
vectorPTFDesc.setEvaluatorFunctionNames(evaluatorFunctionNames);
vectorPTFDesc.setEvaluatorWindowFrameDefs(evaluatorWindowFrameDefs);
vectorPTFDesc.setEvaluatorInputExprNodeDescLists(evaluatorInputExprNodeDescLists);
vectorPTFDesc.setOutputColumnNames(outputColumnNames);
vectorPTFDesc.setOutputTypeInfos(outputTypeInfos);
vectorPTFDesc.setVectorizedPTFMaxMemoryBufferingBatchCount(vectorizedPTFMaxMemoryBufferingBatchCount);
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef in project hive by apache.
the class PTFTranslator method translate.
public PTFDesc translate(WindowingSpec wdwSpec, SemanticAnalyzer semAly, HiveConf hCfg, RowResolver inputRR, UnparseTranslator unparseT) throws SemanticException {
init(semAly, hCfg, inputRR, unparseT);
windowingSpec = wdwSpec;
ptfDesc = new PTFDesc();
ptfDesc.setCfg(hCfg);
ptfDesc.setLlInfo(llInfo);
WindowTableFunctionDef wdwTFnDef = new WindowTableFunctionDef();
ptfDesc.setFuncDef(wdwTFnDef);
PTFQueryInputSpec inpSpec = new PTFQueryInputSpec();
inpSpec.setType(PTFQueryInputType.WINDOWING);
wdwTFnDef.setInput(translate(inpSpec, 0));
ShapeDetails inpShape = wdwTFnDef.getInput().getOutputShape();
WindowingTableFunctionResolver tFn = (WindowingTableFunctionResolver) FunctionRegistry.getTableFunctionResolver(FunctionRegistry.WINDOWING_TABLE_FUNCTION);
if (tFn == null) {
throw new SemanticException(String.format("Internal Error: Unknown Table Function %s", FunctionRegistry.WINDOWING_TABLE_FUNCTION));
}
wdwTFnDef.setName(FunctionRegistry.WINDOWING_TABLE_FUNCTION);
wdwTFnDef.setResolverClassName(tFn.getClass().getName());
wdwTFnDef.setAlias("ptf_" + 1);
wdwTFnDef.setExpressionTreeString(null);
wdwTFnDef.setTransformsRawInput(false);
tFn.initialize(hCfg, ptfDesc, wdwTFnDef);
TableFunctionEvaluator tEval = tFn.getEvaluator();
wdwTFnDef.setTFunction(tEval);
wdwTFnDef.setCarryForwardNames(tFn.carryForwardNames());
wdwTFnDef.setRawInputShape(inpShape);
PartitioningSpec partiSpec = wdwSpec.getQueryPartitioningSpec();
if (partiSpec == null) {
throw new SemanticException("Invalid use of Windowing: there is no Partitioning associated with Windowing");
}
PartitionDef partDef = translate(inpShape, wdwSpec.getQueryPartitionSpec());
OrderDef ordDef = translate(inpShape, wdwSpec.getQueryOrderSpec(), partDef);
wdwTFnDef.setPartition(partDef);
wdwTFnDef.setOrder(ordDef);
/*
* process Wdw functions
*/
ArrayList<WindowFunctionDef> windowFunctions = new ArrayList<WindowFunctionDef>();
if (wdwSpec.getWindowExpressions() != null) {
for (WindowExpressionSpec expr : wdwSpec.getWindowExpressions()) {
if (expr instanceof WindowFunctionSpec) {
WindowFunctionDef wFnDef = translate(wdwTFnDef, (WindowFunctionSpec) expr);
windowFunctions.add(wFnDef);
}
}
wdwTFnDef.setWindowFunctions(windowFunctions);
}
/*
* set outputFromWdwFnProcessing
*/
ArrayList<String> aliases = new ArrayList<String>();
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
for (WindowFunctionDef wFnDef : windowFunctions) {
aliases.add(wFnDef.getAlias());
if (wFnDef.isPivotResult()) {
fieldOIs.add(((ListObjectInspector) wFnDef.getOI()).getListElementObjectInspector());
} else {
fieldOIs.add(wFnDef.getOI());
}
}
PTFTranslator.addInputColumnsToList(inpShape, aliases, fieldOIs);
StructObjectInspector wdwOutOI = ObjectInspectorFactory.getStandardStructObjectInspector(aliases, fieldOIs);
tFn.setWdwProcessingOutputOI(wdwOutOI);
RowResolver wdwOutRR = buildRowResolverForWindowing(wdwTFnDef);
ShapeDetails wdwOutShape = setupShape(wdwOutOI, null, wdwOutRR);
wdwTFnDef.setOutputShape(wdwOutShape);
tFn.setupOutputOI();
PTFDeserializer.alterOutputOIForStreaming(ptfDesc);
return ptfDesc;
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef in project hive by apache.
the class PTFTranslator method translate.
private WindowFunctionDef translate(WindowTableFunctionDef wdwTFnDef, WindowFunctionSpec spec) throws SemanticException {
WindowFunctionInfo wFnInfo = FunctionRegistry.getWindowFunctionInfo(spec.getName());
if (wFnInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(spec.getName()));
}
WindowFunctionDef def = new WindowFunctionDef();
def.setName(spec.getName());
def.setAlias(spec.getAlias());
def.setDistinct(spec.isDistinct());
def.setExpressionTreeString(spec.getExpression().toStringTree());
def.setStar(spec.isStar());
def.setPivotResult(wFnInfo.isPivotResult());
ShapeDetails inpShape = wdwTFnDef.getRawInputShape();
/*
* translate args
*/
ArrayList<ASTNode> args = spec.getArgs();
if (args != null) {
for (ASTNode expr : args) {
PTFExpressionDef argDef = null;
try {
argDef = buildExpressionDef(inpShape, expr);
} catch (HiveException he) {
throw new SemanticException(he);
}
def.addArg(argDef);
}
}
if (FunctionRegistry.isRankingFunction(spec.getName())) {
setupRankingArgs(wdwTFnDef, def, spec);
}
WindowSpec wdwSpec = spec.getWindowSpec();
if (wdwSpec != null) {
String desc = spec.toString();
WindowFrameDef wdwFrame = translate(spec.getName(), inpShape, wdwSpec);
if (!wFnInfo.isSupportsWindow()) {
BoundarySpec start = wdwSpec.getWindowFrame().getStart();
if (start.getAmt() != BoundarySpec.UNBOUNDED_AMOUNT) {
throw new SemanticException(String.format("Expecting left window frame boundary for " + "function %s to be unbounded. Found : %d", desc, start.getAmt()));
}
BoundarySpec end = wdwSpec.getWindowFrame().getEnd();
if (end.getAmt() != BoundarySpec.UNBOUNDED_AMOUNT) {
throw new SemanticException(String.format("Expecting right window frame boundary for " + "function %s to be unbounded. Found : %d", desc, start.getAmt()));
}
}
def.setWindowFrame(wdwFrame);
}
try {
setupWdwFnEvaluator(def);
} catch (HiveException he) {
throw new SemanticException(he);
}
return def;
}
use of org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef in project hive by apache.
the class WindowingTableFunction method initializeStreaming.
@Override
public void initializeStreaming(Configuration cfg, StructObjectInspector inputOI, boolean isMapSide) throws HiveException {
int[] span = setCanAcceptInputAsStream(cfg);
if (!canAcceptInputAsStream) {
return;
}
WindowTableFunctionDef tabDef = (WindowTableFunctionDef) getTableDef();
for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
WindowFunctionDef wFnDef = tabDef.getWindowFunctions().get(i);
WindowFrameDef wdwFrame = wFnDef.getWindowFrame();
GenericUDAFEvaluator fnEval = wFnDef.getWFnEval();
GenericUDAFEvaluator streamingEval = fnEval.getWindowingEvaluator(wdwFrame);
if (streamingEval != null) {
wFnDef.setWFnEval(streamingEval);
if (wFnDef.isPivotResult()) {
ListObjectInspector listOI = (ListObjectInspector) wFnDef.getOI();
wFnDef.setOI(listOI.getListElementObjectInspector());
}
}
}
if (tabDef.getRankLimit() != -1) {
rnkLimitDef = new RankLimit(tabDef.getRankLimit(), tabDef.getRankLimitFunction(), tabDef.getWindowFunctions());
}
streamingState = new StreamingState(cfg, inputOI, isMapSide, tabDef, span[0], span[1]);
}
Aggregations