use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class TestTezTask method setUp.
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
utils = mock(DagUtils.class);
fs = mock(FileSystem.class);
path = mock(Path.class);
when(path.getFileSystem(any(Configuration.class))).thenReturn(fs);
when(utils.getTezDir(any(Path.class))).thenReturn(path);
when(utils.createVertex(any(JobConf.class), any(BaseWork.class), any(Path.class), any(LocalResource.class), any(List.class), any(FileSystem.class), any(Context.class), anyBoolean(), any(TezWork.class), any(VertexType.class))).thenAnswer(new Answer<Vertex>() {
@Override
public Vertex answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
return Vertex.create(((BaseWork) args[1]).getName(), mock(ProcessorDescriptor.class), 0, mock(Resource.class));
}
});
when(utils.createEdge(any(JobConf.class), any(Vertex.class), any(Vertex.class), any(TezEdgeProperty.class), any(VertexType.class))).thenAnswer(new Answer<Edge>() {
@Override
public Edge answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
return Edge.create((Vertex) args[1], (Vertex) args[2], mock(EdgeProperty.class));
}
});
work = new TezWork("", null);
mws = new MapWork[] { new MapWork(), new MapWork() };
rws = new ReduceWork[] { new ReduceWork(), new ReduceWork() };
work.addAll(mws);
work.addAll(rws);
int i = 0;
for (BaseWork w : work.getAllWork()) {
w.setName("Work " + (++i));
}
op = mock(Operator.class);
LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
map.put("foo", op);
mws[0].setAliasToWork(map);
mws[1].setAliasToWork(map);
LinkedHashMap<Path, ArrayList<String>> pathMap = new LinkedHashMap<>();
ArrayList<String> aliasList = new ArrayList<String>();
aliasList.add("foo");
pathMap.put(new Path("foo"), aliasList);
mws[0].setPathToAliases(pathMap);
mws[1].setPathToAliases(pathMap);
rws[0].setReducer(op);
rws[1].setReducer(op);
TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
work.connect(mws[0], rws[0], edgeProp);
work.connect(mws[1], rws[0], edgeProp);
work.connect(rws[0], rws[1], edgeProp);
task = new TezTask(utils);
task.setWork(work);
task.setConsole(mock(LogHelper.class));
QueryPlan mockQueryPlan = mock(QueryPlan.class);
doReturn(UUID.randomUUID().toString()).when(mockQueryPlan).getQueryId();
task.setQueryPlan(mockQueryPlan);
conf = new JobConf();
appLr = mock(LocalResource.class);
HiveConf hiveConf = new HiveConf();
hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
SessionState.start(hiveConf);
session = mock(TezClient.class);
sessionState = mock(TezSessionState.class);
when(sessionState.getSession()).thenReturn(session);
when(session.submitDAG(any(DAG.class))).thenThrow(new SessionNotRunning("")).thenReturn(mock(DAGClient.class));
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class TestExecDriver method populateMapRedPlan1.
@SuppressWarnings("unchecked")
private void populateMapRedPlan1(Table src) throws SemanticException {
ArrayList<String> outputColumns = new ArrayList<String>();
for (int i = 0; i < 2; i++) {
outputColumns.add("_col" + i);
}
// map-side work
Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("value")), outputColumns, true, -1, 1, -1, AcidUtils.Operation.NOT_ACID));
addMapWork(mr, src, "a", op1);
ReduceWork rWork = new ReduceWork();
rWork.setNumReduceTasks(Integer.valueOf(1));
rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
mr.setReduceWork(rWork);
// reduce side work
Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan1.out"), Utilities.defaultTd, false));
List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
List<String> colNames = new ArrayList<String>();
colNames.add(HiveConf.getColumnInternalName(2));
Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, colNames), op3);
rWork.setReducer(op2);
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class TestExecDriver method populateMapRedPlan5.
@SuppressWarnings("unchecked")
private void populateMapRedPlan5(Table src) throws SemanticException {
// map-side work
ArrayList<String> outputColumns = new ArrayList<String>();
for (int i = 0; i < 2; i++) {
outputColumns.add("_col" + i);
}
Operator<ReduceSinkDesc> op0 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("0")), Utilities.makeList(getStringColumn("0"), getStringColumn("1")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID));
Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns), op0);
addMapWork(mr, src, "a", op4);
ReduceWork rWork = new ReduceWork();
mr.setReduceWork(rWork);
rWork.setNumReduceTasks(Integer.valueOf(1));
rWork.setKeyDesc(op0.getConf().getKeySerializeInfo());
rWork.getTagToValueDesc().add(op0.getConf().getValueSerializeInfo());
// reduce side work
Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan5.out"), Utilities.defaultTd, false));
List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
rWork.setReducer(op2);
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class TestExecDriver method populateMapRedPlan2.
@SuppressWarnings("unchecked")
private void populateMapRedPlan2(Table src) throws Exception {
ArrayList<String> outputColumns = new ArrayList<String>();
for (int i = 0; i < 2; i++) {
outputColumns.add("_col" + i);
}
// map-side work
Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID));
addMapWork(mr, src, "a", op1);
ReduceWork rWork = new ReduceWork();
rWork.setNumReduceTasks(Integer.valueOf(1));
rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
mr.setReduceWork(rWork);
// reduce side work
Operator<FileSinkDesc> op4 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan2.out"), Utilities.defaultTd, false));
Operator<FilterDesc> op3 = OperatorFactory.get(getTestFilterDesc("0"), op4);
List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
rWork.setReducer(op2);
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class TestExecDriver method populateMapRedPlan4.
@SuppressWarnings("unchecked")
private void populateMapRedPlan4(Table src) throws SemanticException {
// map-side work
ArrayList<String> outputColumns = new ArrayList<String>();
for (int i = 0; i < 2; i++) {
outputColumns.add("_col" + i);
}
Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("tkey")), Utilities.makeList(getStringColumn("tkey"), getStringColumn("tvalue")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID));
Operator<ScriptDesc> op0 = OperatorFactory.get(new ScriptDesc("cat", PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key,value"), TextRecordWriter.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "tkey,tvalue"), TextRecordReader.class, TextRecordReader.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key")), op1);
Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns), op0);
addMapWork(mr, src, "a", op4);
ReduceWork rWork = new ReduceWork();
rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
rWork.setNumReduceTasks(Integer.valueOf(1));
mr.setReduceWork(rWork);
// reduce side work
Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan4.out"), Utilities.defaultTd, false));
List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
rWork.setReducer(op2);
}
Aggregations