Search in sources :

Example 1 with FileSinkDesc

use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.

the class TestExecDriver method populateMapRedPlan1.

@SuppressWarnings("unchecked")
private void populateMapRedPlan1(Table src) throws SemanticException {
    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < 2; i++) {
        outputColumns.add("_col" + i);
    }
    // map-side work
    Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("value")), outputColumns, true, -1, 1, -1, AcidUtils.Operation.NOT_ACID));
    addMapWork(mr, src, "a", op1);
    ReduceWork rWork = new ReduceWork();
    rWork.setNumReduceTasks(Integer.valueOf(1));
    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    mr.setReduceWork(rWork);
    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan1.out"), Utilities.defaultTd, false));
    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
    List<String> colNames = new ArrayList<String>();
    colNames.add(HiveConf.getColumnInternalName(2));
    Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, colNames), op3);
    rWork.setReducer(op2);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 2 with FileSinkDesc

use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.

the class TestExecDriver method populateMapRedPlan5.

@SuppressWarnings("unchecked")
private void populateMapRedPlan5(Table src) throws SemanticException {
    // map-side work
    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < 2; i++) {
        outputColumns.add("_col" + i);
    }
    Operator<ReduceSinkDesc> op0 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("0")), Utilities.makeList(getStringColumn("0"), getStringColumn("1")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID));
    Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns), op0);
    addMapWork(mr, src, "a", op4);
    ReduceWork rWork = new ReduceWork();
    mr.setReduceWork(rWork);
    rWork.setNumReduceTasks(Integer.valueOf(1));
    rWork.setKeyDesc(op0.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op0.getConf().getValueSerializeInfo());
    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan5.out"), Utilities.defaultTd, false));
    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
    cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
    Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
    rWork.setReducer(op2);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 3 with FileSinkDesc

use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.

the class TestExecDriver method populateMapPlan2.

@SuppressWarnings("unchecked")
private void populateMapPlan2(Table src) throws Exception {
    Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapplan2.out"), Utilities.defaultTd, false));
    Operator<ScriptDesc> op2 = OperatorFactory.get(new ScriptDesc("cat", PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key,value"), TextRecordWriter.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key,value"), TextRecordReader.class, TextRecordReader.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key")), op3);
    Operator<FilterDesc> op1 = OperatorFactory.get(getTestFilterDesc("key"), op2);
    addMapWork(mr, src, "a", op1);
}
Also used : Path(org.apache.hadoop.fs.Path) ScriptDesc(org.apache.hadoop.hive.ql.plan.ScriptDesc) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc)

Example 4 with FileSinkDesc

use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.

the class TestExecDriver method populateMapRedPlan2.

@SuppressWarnings("unchecked")
private void populateMapRedPlan2(Table src) throws Exception {
    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < 2; i++) {
        outputColumns.add("_col" + i);
    }
    // map-side work
    Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID));
    addMapWork(mr, src, "a", op1);
    ReduceWork rWork = new ReduceWork();
    rWork.setNumReduceTasks(Integer.valueOf(1));
    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    mr.setReduceWork(rWork);
    // reduce side work
    Operator<FileSinkDesc> op4 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan2.out"), Utilities.defaultTd, false));
    Operator<FilterDesc> op3 = OperatorFactory.get(getTestFilterDesc("0"), op4);
    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
    cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
    Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
    rWork.setReducer(op2);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 5 with FileSinkDesc

use of org.apache.hadoop.hive.ql.plan.FileSinkDesc in project hive by apache.

the class TestExecDriver method populateMapRedPlan4.

@SuppressWarnings("unchecked")
private void populateMapRedPlan4(Table src) throws SemanticException {
    // map-side work
    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < 2; i++) {
        outputColumns.add("_col" + i);
    }
    Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("tkey")), Utilities.makeList(getStringColumn("tkey"), getStringColumn("tvalue")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID));
    Operator<ScriptDesc> op0 = OperatorFactory.get(new ScriptDesc("cat", PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key,value"), TextRecordWriter.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "tkey,tvalue"), TextRecordReader.class, TextRecordReader.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key")), op1);
    Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns), op0);
    addMapWork(mr, src, "a", op4);
    ReduceWork rWork = new ReduceWork();
    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    rWork.setNumReduceTasks(Integer.valueOf(1));
    mr.setReduceWork(rWork);
    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan4.out"), Utilities.defaultTd, false));
    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
    cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
    Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
    rWork.setReducer(op2);
}
Also used : ScriptDesc(org.apache.hadoop.hive.ql.plan.ScriptDesc) Path(org.apache.hadoop.fs.Path) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Aggregations

FileSinkDesc (org.apache.hadoop.hive.ql.plan.FileSinkDesc)22 Path (org.apache.hadoop.fs.Path)13 ArrayList (java.util.ArrayList)11 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)8 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)6 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)6 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)6 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)4 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)4 DynamicPartitionCtx (org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx)4 FilterDesc (org.apache.hadoop.hive.ql.plan.FilterDesc)4 FileSystem (org.apache.hadoop.fs.FileSystem)3 HiveConf (org.apache.hadoop.hive.conf.HiveConf)3 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)3 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)3 IOException (java.io.IOException)2 Serializable (java.io.Serializable)2 LinkedHashMap (java.util.LinkedHashMap)2 List (java.util.List)2 Context (org.apache.hadoop.hive.ql.Context)2