Search in sources :

Example 1 with ScriptDesc

use of org.apache.hadoop.hive.ql.plan.ScriptDesc in project hive by apache.

the class TestExecDriver method populateMapPlan2.

@SuppressWarnings("unchecked")
private void populateMapPlan2(Table src) throws Exception {
    Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapplan2.out"), Utilities.defaultTd, false));
    Operator<ScriptDesc> op2 = OperatorFactory.get(new ScriptDesc("cat", PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key,value"), TextRecordWriter.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key,value"), TextRecordReader.class, TextRecordReader.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key")), op3);
    Operator<FilterDesc> op1 = OperatorFactory.get(getTestFilterDesc("key"), op2);
    addMapWork(mr, src, "a", op1);
}
Also used : Path(org.apache.hadoop.fs.Path) ScriptDesc(org.apache.hadoop.hive.ql.plan.ScriptDesc) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc)

Example 2 with ScriptDesc

use of org.apache.hadoop.hive.ql.plan.ScriptDesc in project hive by apache.

the class TestExecDriver method populateMapRedPlan4.

@SuppressWarnings("unchecked")
private void populateMapRedPlan4(Table src) throws SemanticException {
    // map-side work
    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < 2; i++) {
        outputColumns.add("_col" + i);
    }
    Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("tkey")), Utilities.makeList(getStringColumn("tkey"), getStringColumn("tvalue")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID));
    Operator<ScriptDesc> op0 = OperatorFactory.get(new ScriptDesc("cat", PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key,value"), TextRecordWriter.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "tkey,tvalue"), TextRecordReader.class, TextRecordReader.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key")), op1);
    Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns), op0);
    addMapWork(mr, src, "a", op4);
    ReduceWork rWork = new ReduceWork();
    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    rWork.setNumReduceTasks(Integer.valueOf(1));
    mr.setReduceWork(rWork);
    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan4.out"), Utilities.defaultTd, false));
    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
    cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
    Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
    rWork.setReducer(op2);
}
Also used : ScriptDesc(org.apache.hadoop.hive.ql.plan.ScriptDesc) Path(org.apache.hadoop.fs.Path) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 3 with ScriptDesc

use of org.apache.hadoop.hive.ql.plan.ScriptDesc in project hive by apache.

the class TestOperators method testScriptOperator.

public void testScriptOperator() throws Throwable {
    try {
        System.out.println("Testing Script Operator");
        // col1
        ExprNodeDesc exprDesc1 = TestExecDriver.getStringColumn("col1");
        // col2
        ExprNodeDesc expr1 = TestExecDriver.getStringColumn("col0");
        ExprNodeDesc expr2 = new ExprNodeConstantDesc("1");
        ExprNodeDesc exprDesc2 = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("concat", expr1, expr2);
        // select operator to project these two columns
        ArrayList<ExprNodeDesc> earr = new ArrayList<ExprNodeDesc>();
        earr.add(exprDesc1);
        earr.add(exprDesc2);
        ArrayList<String> outputCols = new ArrayList<String>();
        for (int i = 0; i < earr.size(); i++) {
            outputCols.add("_col" + i);
        }
        SelectDesc selectCtx = new SelectDesc(earr, outputCols);
        Operator<SelectDesc> op = OperatorFactory.get(new CompilationOpContext(), SelectDesc.class);
        op.setConf(selectCtx);
        // scriptOperator to echo the output of the select
        TableDesc scriptOutput = PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "a,b");
        TableDesc scriptInput = PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "a,b");
        ScriptDesc sd = new ScriptDesc("cat", scriptOutput, TextRecordWriter.class, scriptInput, TextRecordReader.class, TextRecordReader.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key"));
        Operator<ScriptDesc> sop = OperatorFactory.getAndMakeChild(sd, op);
        // Collect operator to observe the output of the script
        CollectDesc cd = new CollectDesc(Integer.valueOf(10));
        CollectOperator cdop = (CollectOperator) OperatorFactory.getAndMakeChild(cd, sop);
        op.initialize(new JobConf(TestOperators.class), new ObjectInspector[] { r[0].oi });
        // evaluate on row
        for (int i = 0; i < 5; i++) {
            op.process(r[i].o, 0);
        }
        op.close(false);
        InspectableObject io = new InspectableObject();
        for (int i = 0; i < 5; i++) {
            cdop.retrieve(io);
            System.out.println("[" + i + "] io.o=" + io.o);
            System.out.println("[" + i + "] io.oi=" + io.oi);
            StructObjectInspector soi = (StructObjectInspector) io.oi;
            assert (soi != null);
            StructField a = soi.getStructFieldRef("a");
            StructField b = soi.getStructFieldRef("b");
            assertEquals("" + (i + 1), ((PrimitiveObjectInspector) a.getFieldObjectInspector()).getPrimitiveJavaObject(soi.getStructFieldData(io.o, a)));
            assertEquals((i) + "1", ((PrimitiveObjectInspector) b.getFieldObjectInspector()).getPrimitiveJavaObject(soi.getStructFieldData(io.o, b)));
        }
        System.out.println("Script Operator ok");
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ScriptDesc(org.apache.hadoop.hive.ql.plan.ScriptDesc) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) CollectDesc(org.apache.hadoop.hive.ql.plan.CollectDesc) ArrayList(java.util.ArrayList) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JobConf(org.apache.hadoop.mapred.JobConf) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 4 with ScriptDesc

use of org.apache.hadoop.hive.ql.plan.ScriptDesc in project hive by apache.

the class TestExecDriver method populateMapRedPlan6.

@SuppressWarnings("unchecked")
private void populateMapRedPlan6(Table src) throws Exception {
    // map-side work
    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < 2; i++) {
        outputColumns.add("_col" + i);
    }
    Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("tkey")), Utilities.makeList(getStringColumn("tkey"), getStringColumn("tvalue")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID));
    Operator<ScriptDesc> op0 = OperatorFactory.get(new ScriptDesc("\'cat\'", PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "tkey,tvalue"), TextRecordWriter.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "tkey,tvalue"), TextRecordReader.class, TextRecordReader.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key")), op1);
    Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns), op0);
    addMapWork(mr, src, "a", op4);
    ReduceWork rWork = new ReduceWork();
    mr.setReduceWork(rWork);
    rWork.setNumReduceTasks(Integer.valueOf(1));
    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan6.out"), Utilities.defaultTd, false));
    Operator<FilterDesc> op2 = OperatorFactory.get(getTestFilterDesc("0"), op3);
    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
    cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
    Operator<SelectDesc> op5 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op2);
    rWork.setReducer(op5);
}
Also used : ScriptDesc(org.apache.hadoop.hive.ql.plan.ScriptDesc) Path(org.apache.hadoop.fs.Path) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 5 with ScriptDesc

use of org.apache.hadoop.hive.ql.plan.ScriptDesc in project hive by apache.

the class SemanticAnalyzer method genScriptPlan.

@SuppressWarnings("nls")
private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) throws SemanticException {
    // If there is no "AS" clause, the output schema will be "key,value"
    ArrayList<ColumnInfo> outputCols = new ArrayList<ColumnInfo>();
    int inputSerDeNum = 1, inputRecordWriterNum = 2;
    int outputSerDeNum = 4, outputRecordReaderNum = 5;
    int outputColsNum = 6;
    boolean outputColNames = false, outputColSchemas = false;
    int execPos = 3;
    boolean defaultOutputCols = false;
    // Go over all the children
    if (trfm.getChildCount() > outputColsNum) {
        ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum);
        if (outCols.getType() == HiveParser.TOK_ALIASLIST) {
            outputColNames = true;
        } else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) {
            outputColSchemas = true;
        }
    }
    // If column type is not specified, use a string
    if (!outputColNames && !outputColSchemas) {
        String intName = getColumnInternalName(0);
        ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
        colInfo.setAlias("key");
        outputCols.add(colInfo);
        intName = getColumnInternalName(1);
        colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
        colInfo.setAlias("value");
        outputCols.add(colInfo);
        defaultOutputCols = true;
    } else {
        ASTNode collist = (ASTNode) trfm.getChild(outputColsNum);
        int ccount = collist.getChildCount();
        Set<String> colAliasNamesDuplicateCheck = new HashSet<String>();
        if (outputColNames) {
            for (int i = 0; i < ccount; ++i) {
                String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i)).getText());
                failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
                String intName = getColumnInternalName(i);
                ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null, false);
                colInfo.setAlias(colAlias);
                outputCols.add(colInfo);
            }
        } else {
            for (int i = 0; i < ccount; ++i) {
                ASTNode child = (ASTNode) collist.getChild(i);
                assert child.getType() == HiveParser.TOK_TABCOL;
                String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0)).getText());
                failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
                String intName = getColumnInternalName(i);
                ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils.getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child.getChild(1))), null, false);
                colInfo.setAlias(colAlias);
                outputCols.add(colInfo);
            }
        }
    }
    RowResolver out_rwsch = new RowResolver();
    StringBuilder columns = new StringBuilder();
    StringBuilder columnTypes = new StringBuilder();
    for (int i = 0; i < outputCols.size(); ++i) {
        if (i != 0) {
            columns.append(",");
            columnTypes.append(",");
        }
        columns.append(outputCols.get(i).getInternalName());
        columnTypes.append(outputCols.get(i).getType().getTypeName());
        out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(), outputCols.get(i));
    }
    StringBuilder inpColumns = new StringBuilder();
    StringBuilder inpColumnTypes = new StringBuilder();
    ArrayList<ColumnInfo> inputSchema = opParseCtx.get(input).getRowResolver().getColumnInfos();
    for (int i = 0; i < inputSchema.size(); ++i) {
        if (i != 0) {
            inpColumns.append(",");
            inpColumnTypes.append(",");
        }
        inpColumns.append(inputSchema.get(i).getInternalName());
        inpColumnTypes.append(inputSchema.get(i).getType().getTypeName());
    }
    TableDesc outInfo;
    TableDesc errInfo;
    TableDesc inInfo;
    String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE);
    Class<? extends Deserializer> serde;
    try {
        serde = (Class<? extends Deserializer>) Class.forName(defaultSerdeName, true, Utilities.getSessionSpecifiedClassLoader());
    } catch (ClassNotFoundException e) {
        throw new SemanticException(e);
    }
    int fieldSeparator = Utilities.tabCode;
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESCRIPTESCAPE)) {
        fieldSeparator = Utilities.ctrlaCode;
    }
    // Input and Output Serdes
    if (trfm.getChild(inputSerDeNum).getChildCount() > 0) {
        inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm.getChild(inputSerDeNum))).getChild(0), inpColumns.toString(), inpColumnTypes.toString(), false);
    } else {
        inInfo = PlanUtils.getTableDesc(serde, Integer.toString(fieldSeparator), inpColumns.toString(), inpColumnTypes.toString(), false, true);
    }
    if (trfm.getChild(outputSerDeNum).getChildCount() > 0) {
        outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm.getChild(outputSerDeNum))).getChild(0), columns.toString(), columnTypes.toString(), false);
    // This is for backward compatibility. If the user did not specify the
    // output column list, we assume that there are 2 columns: key and value.
    // However, if the script outputs: col1, col2, col3 seperated by TAB, the
    // requirement is: key is col and value is (col2 TAB col3)
    } else {
        outInfo = PlanUtils.getTableDesc(serde, Integer.toString(fieldSeparator), columns.toString(), columnTypes.toString(), defaultOutputCols);
    }
    // Error stream always uses the default serde with a single column
    errInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), "KEY");
    // Output record readers
    Class<? extends RecordReader> outRecordReader = getRecordReader((ASTNode) trfm.getChild(outputRecordReaderNum));
    Class<? extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode) trfm.getChild(inputRecordWriterNum));
    Class<? extends RecordReader> errRecordReader = getDefaultRecordReader();
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new ScriptDesc(fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())), inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
    // disable backtracking
    output.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
    // Add URI entity for transform script. script assumed t be local unless downloadable
    if (conf.getBoolVar(ConfVars.HIVE_CAPTURE_TRANSFORM_ENTITY)) {
        String scriptCmd = getScriptProgName(stripQuotes(trfm.getChild(execPos).getText()));
        getInputs().add(new ReadEntity(new Path(scriptCmd), ResourceDownloader.isFileUri(scriptCmd)));
    }
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ScriptDesc(org.apache.hadoop.hive.ql.plan.ScriptDesc) Path(org.apache.hadoop.fs.Path) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) InsertTableDesc(org.apache.hadoop.hive.ql.plan.InsertTableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) AlterTableDesc(org.apache.hadoop.hive.ql.plan.AlterTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Aggregations

ScriptDesc (org.apache.hadoop.hive.ql.plan.ScriptDesc)5 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)4 FileSinkDesc (org.apache.hadoop.hive.ql.plan.FileSinkDesc)3 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)3 FilterDesc (org.apache.hadoop.hive.ql.plan.FilterDesc)2 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)2 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)2 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)2 HashSet (java.util.HashSet)1 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)1 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)1 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)1 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)1 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)1 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)1 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)1 Operator (org.apache.hadoop.hive.ql.exec.Operator)1 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)1