use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class TestExecDriver method populateMapRedPlan1.
@SuppressWarnings("unchecked")
private void populateMapRedPlan1(Table src) throws SemanticException {
ArrayList<String> outputColumns = new ArrayList<String>();
for (int i = 0; i < 2; i++) {
outputColumns.add("_col" + i);
}
// map-side work
Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("value")), outputColumns, true, -1, 1, -1, AcidUtils.Operation.NOT_ACID, NullOrdering.NULLS_LAST));
addMapWork(mr, src, "a", op1);
ReduceWork rWork = new ReduceWork();
rWork.setNumReduceTasks(Integer.valueOf(1));
rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
mr.setReduceWork(rWork);
// reduce side work
Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(TMPDIR + File.separator + "mapredplan1.out"), Utilities.defaultTd, false));
List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
List<String> colNames = new ArrayList<String>();
colNames.add(HiveConf.getColumnInternalName(2));
Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, colNames), op3);
rWork.setReducer(op2);
}
use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class TestExecDriver method populateMapRedPlan4.
@SuppressWarnings("unchecked")
private void populateMapRedPlan4(Table src) throws SemanticException {
// map-side work
ArrayList<String> outputColumns = new ArrayList<String>();
for (int i = 0; i < 2; i++) {
outputColumns.add("_col" + i);
}
Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("tkey")), Utilities.makeList(getStringColumn("tkey"), getStringColumn("tvalue")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID, NullOrdering.NULLS_LAST));
Operator<ScriptDesc> op0 = OperatorFactory.get(new ScriptDesc("cat", PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key,value"), TextRecordWriter.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "tkey,tvalue"), TextRecordReader.class, TextRecordReader.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key")), op1);
Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns), op0);
addMapWork(mr, src, "a", op4);
ReduceWork rWork = new ReduceWork();
rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
rWork.setNumReduceTasks(Integer.valueOf(1));
mr.setReduceWork(rWork);
// reduce side work
Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(TMPDIR + File.separator + "mapredplan4.out"), Utilities.defaultTd, false));
List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
rWork.setReducer(op2);
}
use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class MapJoinTestConfig method addFullOuterIntercept.
public static CountCollectorTestOperator addFullOuterIntercept(MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, RowTestObjectsMultiSet outputTestRowMultiSet, MapJoinTestData testData, MapJoinOperator mapJoinOperator, MapJoinTableContainer mapJoinTableContainer, MapJoinTableContainerSerDe mapJoinTableContainerSerDe) throws SerDeException, IOException, HiveException {
MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf();
// For FULL OUTER MapJoin, we require all Big Keys to be present in the output result.
// The first N output columns are the Big Table key columns.
Map<Byte, List<ExprNodeDesc>> keyMap = mapJoinDesc.getKeys();
List<ExprNodeDesc> bigTableKeyExprs = keyMap.get((byte) 0);
final int bigTableKeySize = bigTableKeyExprs.size();
Map<Byte, List<Integer>> retainMap = mapJoinDesc.getRetainList();
List<Integer> bigTableRetainList = retainMap.get((byte) 0);
final int bigTableRetainSize = bigTableRetainList.size();
List<String> outputColumnNameList = mapJoinDesc.getOutputColumnNames();
String[] mapJoinOutputColumnNames = outputColumnNameList.toArray(new String[0]);
// Use a utility method to get the MapJoin output TypeInfo.
TypeInfo[] mapJoinOutputTypeInfos = VectorMapJoinBaseOperator.getOutputTypeInfos(mapJoinDesc);
final boolean isVectorOutput = MapJoinTestConfig.isVectorOutput(mapJoinImplementation);
/*
* Always create a row-mode SelectOperator. If we are vector-mode, next we will use its
* expressions and replace it with a VectorSelectOperator.
*/
Operator<SelectDesc> selectOperator = makeInterceptSelectOperator(mapJoinOperator, bigTableKeySize, bigTableRetainSize, mapJoinOutputColumnNames, mapJoinOutputTypeInfos);
List<String> selectOutputColumnNameList = ((SelectDesc) selectOperator.getConf()).getOutputColumnNames();
String[] selectOutputColumnNames = selectOutputColumnNameList.toArray(new String[0]);
if (isVectorOutput) {
selectOperator = vectorizeInterceptSelectOperator(mapJoinOperator, bigTableKeySize, bigTableRetainSize, selectOperator);
}
/*
* Create test description just for FULL OUTER INTERCEPT with different
*/
MapJoinTestDescription interceptTestDesc = new MapJoinTestDescription(testDesc.hiveConf, testDesc.vectorMapJoinVariation, selectOutputColumnNames, Arrays.copyOf(mapJoinOutputTypeInfos, bigTableRetainSize), testDesc.bigTableKeyColumnNums, testDesc.smallTableValueTypeInfos, testDesc.smallTableRetainKeyColumnNums, testDesc.smallTableGenerationParameters, testDesc.mapJoinPlanVariation);
MapJoinDesc intersectMapJoinDesc = createMapJoinDesc(interceptTestDesc, /* isFullOuterIntersect */
true);
/*
* Create FULL OUTER INTERSECT MapJoin operator.
*/
CreateMapJoinResult interceptCreateMapJoinResult = createMapJoinImplementation(mapJoinImplementation, interceptTestDesc, testData, intersectMapJoinDesc);
MapJoinOperator intersectMapJoinOperator = interceptCreateMapJoinResult.mapJoinOperator;
MapJoinTableContainer intersectMapJoinTableContainer = interceptCreateMapJoinResult.mapJoinTableContainer;
MapJoinTableContainerSerDe interceptMapJoinTableContainerSerDe = interceptCreateMapJoinResult.mapJoinTableContainerSerDe;
connectOperators(mapJoinOperator, selectOperator);
connectOperators(selectOperator, intersectMapJoinOperator);
CountCollectorTestOperator interceptTestCollectorOperator;
if (!isVectorOutput) {
interceptTestCollectorOperator = new TestMultiSetCollectorOperator(interceptTestDesc.outputObjectInspectors, outputTestRowMultiSet);
} else {
VectorizationContext vContext = ((VectorizationContextRegion) intersectMapJoinOperator).getOutputVectorizationContext();
int[] intersectProjectionColumns = ArrayUtils.toPrimitive(vContext.getProjectedColumns().toArray(new Integer[0]));
interceptTestCollectorOperator = new TestMultiSetVectorCollectorOperator(intersectProjectionColumns, interceptTestDesc.outputTypeInfos, interceptTestDesc.outputObjectInspectors, outputTestRowMultiSet);
}
connectOperators(intersectMapJoinOperator, interceptTestCollectorOperator);
// Setup the FULL OUTER INTERSECT MapJoin's inputObjInspector to include the Small Table, etc.
intersectMapJoinOperator.setInputObjInspectors(interceptTestDesc.inputObjectInspectors);
// Now, invoke initializeOp methods from the root MapJoin operator.
mapJoinOperator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors);
// Fixup the mapJoinTables container references to our test data.
mapJoinOperator.setTestMapJoinTableContainer(1, mapJoinTableContainer, mapJoinTableContainerSerDe);
intersectMapJoinOperator.setTestMapJoinTableContainer(1, intersectMapJoinTableContainer, interceptMapJoinTableContainerSerDe);
return interceptTestCollectorOperator;
}
use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class MapJoinTestConfig method makeInterceptSelectOperator.
private static Operator<SelectDesc> makeInterceptSelectOperator(MapJoinOperator mapJoinOperator, int bigTableKeySize, int bigTableRetainSize, String[] outputColumnNames, TypeInfo[] outputTypeInfos) {
MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf();
List<ExprNodeDesc> selectExprList = new ArrayList<ExprNodeDesc>();
List<String> selectOutputColumnNameList = new ArrayList<String>();
for (int i = 0; i < bigTableRetainSize; i++) {
String selectOutputColumnName = HiveConf.getColumnInternalName(i);
selectOutputColumnNameList.add(selectOutputColumnName);
TypeInfo outputTypeInfo = outputTypeInfos[i];
if (i < bigTableKeySize) {
// Big Table key.
ExprNodeColumnDesc keyColumnExpr = new ExprNodeColumnDesc(outputTypeInfo, outputColumnNames[i], "test", false);
selectExprList.add(keyColumnExpr);
} else {
// For row-mode, substitute NULL constant for any non-key extra Big Table columns.
ExprNodeConstantDesc nullExtraColumnExpr = new ExprNodeConstantDesc(outputTypeInfo, null);
nullExtraColumnExpr.setFoldedFromCol(outputColumnNames[i]);
selectExprList.add(nullExtraColumnExpr);
}
}
SelectDesc selectDesc = new SelectDesc(selectExprList, selectOutputColumnNameList);
Operator<SelectDesc> selectOperator = OperatorFactory.get(new CompilationOpContext(), selectDesc);
return selectOperator;
}
use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class MapJoinTestConfig method vectorizeInterceptSelectOperator.
private static Operator<SelectDesc> vectorizeInterceptSelectOperator(MapJoinOperator mapJoinOperator, int bigTableKeySize, int bigTableRetainSize, Operator<SelectDesc> selectOperator) throws HiveException {
MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf();
VectorizationContext vOutContext = ((VectorizationContextRegion) mapJoinOperator).getOutputVectorizationContext();
SelectDesc selectDesc = (SelectDesc) selectOperator.getConf();
List<ExprNodeDesc> selectExprs = selectDesc.getColList();
VectorExpression[] selectVectorExpr = new VectorExpression[bigTableRetainSize];
for (int i = 0; i < bigTableRetainSize; i++) {
TypeInfo typeInfo = selectExprs.get(i).getTypeInfo();
if (i < bigTableKeySize) {
// Big Table key.
selectVectorExpr[i] = vOutContext.getVectorExpression(selectExprs.get(i));
} else {
// For vector-mode, for test purposes we substitute a NO-OP (we don't want to modify
// the batch).
// FULL OUTER INTERCEPT does not look at non-key columns.
NoOpExpression noOpExpression = new NoOpExpression(i);
noOpExpression.setInputTypeInfos(typeInfo);
noOpExpression.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
noOpExpression.setOutputTypeInfo(typeInfo);
noOpExpression.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
selectVectorExpr[i] = noOpExpression;
}
}
System.out.println("*BENCHMARK* VectorSelectOperator selectVectorExpr " + Arrays.toString(selectVectorExpr));
int[] projectedColumns = ArrayUtils.toPrimitive(vOutContext.getProjectedColumns().subList(0, bigTableRetainSize).toArray(new Integer[0]));
System.out.println("*BENCHMARK* VectorSelectOperator projectedColumns " + Arrays.toString(projectedColumns));
VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
vectorSelectDesc.setSelectExpressions(selectVectorExpr);
vectorSelectDesc.setProjectedOutputColumns(projectedColumns);
Operator<SelectDesc> vectorSelectOperator = OperatorFactory.getVectorOperator(selectOperator.getCompilationOpContext(), selectDesc, vOutContext, vectorSelectDesc);
return vectorSelectOperator;
}
Aggregations