Search in sources :

Example 11 with FetchWork

use of org.apache.hadoop.hive.ql.plan.FetchWork in project hive by apache.

the class MapJoinProcessor method genMapJoinLocalWork.

/**
   * Generate the MapRed Local Work for the given map-join operator
   *
   * @param newWork
   * @param mapJoinOp
   *          map-join operator for which local work needs to be generated.
   * @param bigTablePos
   * @throws SemanticException
   */
private static void genMapJoinLocalWork(MapredWork newWork, MapJoinOperator mapJoinOp, int bigTablePos) throws SemanticException {
    // keep the small table alias to avoid concurrent modification exception
    ArrayList<String> smallTableAliasList = new ArrayList<String>();
    // create a new  MapredLocalWork
    MapredLocalWork newLocalWork = new MapredLocalWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(), new LinkedHashMap<String, FetchWork>());
    for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : newWork.getMapWork().getAliasToWork().entrySet()) {
        String alias = entry.getKey();
        Operator<? extends OperatorDesc> op = entry.getValue();
        // if the table scan is for big table; then skip it
        // tracing down the operator tree from the table scan operator
        Operator<? extends OperatorDesc> parentOp = op;
        Operator<? extends OperatorDesc> childOp = op.getChildOperators().get(0);
        while ((childOp != null) && (!childOp.equals(mapJoinOp))) {
            parentOp = childOp;
            assert parentOp.getChildOperators().size() == 1;
            childOp = parentOp.getChildOperators().get(0);
        }
        if (childOp == null) {
            throw new SemanticException("Cannot find join op by tracing down the table scan operator tree");
        }
        // skip the big table pos
        int i = childOp.getParentOperators().indexOf(parentOp);
        if (i == bigTablePos) {
            continue;
        }
        // set alias to work and put into smallTableAliasList
        newLocalWork.getAliasToWork().put(alias, op);
        smallTableAliasList.add(alias);
        // get input path and remove this alias from pathToAlias
        // because this file will be fetched by fetch operator
        LinkedHashMap<Path, ArrayList<String>> pathToAliases = newWork.getMapWork().getPathToAliases();
        // keep record all the input path for this alias
        HashSet<Path> pathSet = new HashSet<>();
        HashSet<Path> emptyPath = new HashSet<>();
        for (Map.Entry<Path, ArrayList<String>> entry2 : pathToAliases.entrySet()) {
            Path path = entry2.getKey();
            ArrayList<String> list = entry2.getValue();
            if (list.contains(alias)) {
                // add to path set
                pathSet.add(path);
                //remove this alias from the alias list
                list.remove(alias);
                if (list.size() == 0) {
                    emptyPath.add(path);
                }
            }
        }
        //remove the path, with which no alias associates
        for (Path path : emptyPath) {
            newWork.getMapWork().removePathToAlias(path);
        }
        // create fetch work
        FetchWork fetchWork = null;
        List<Path> partDir = new ArrayList<Path>();
        List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
        for (Path tablePath : pathSet) {
            PartitionDesc partitionDesc = newWork.getMapWork().getPathToPartitionInfo().get(tablePath);
            // create fetchwork for non partitioned table
            if (partitionDesc.getPartSpec() == null || partitionDesc.getPartSpec().size() == 0) {
                fetchWork = new FetchWork(tablePath, partitionDesc.getTableDesc());
                break;
            }
            // if table is partitioned,add partDir and partitionDesc
            partDir.add(tablePath);
            partDesc.add(partitionDesc);
        }
        // create fetchwork for partitioned table
        if (fetchWork == null) {
            TableDesc table = newWork.getMapWork().getAliasToPartnInfo().get(alias).getTableDesc();
            fetchWork = new FetchWork(partDir, partDesc, table);
        }
        // set alias to fetch work
        newLocalWork.getAliasToFetchWork().put(alias, fetchWork);
    }
    // remove small table ailias from aliasToWork;Avoid concurrent modification
    for (String alias : smallTableAliasList) {
        newWork.getMapWork().getAliasToWork().remove(alias);
    }
    // set up local work
    newWork.getMapWork().setMapRedLocalWork(newLocalWork);
    // remove reducer
    newWork.setReduceWork(null);
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HashSet(java.util.HashSet)

Example 12 with FetchWork

use of org.apache.hadoop.hive.ql.plan.FetchWork in project hive by apache.

the class GenMapRedUtils method setTaskPlan.

/**
   * set the current task in the mapredWork.
   *
   * @param alias
   *          current alias
   * @param topOp
   *          the top operator of the stack
   * @param plan
   *          current plan
   * @param local
   *          whether you need to add to map-reduce or local work
   * @param tt_desc
   *          table descriptor
   * @throws SerDeException
   */
public static void setTaskPlan(Path path, String alias, Operator<? extends OperatorDesc> topOp, MapWork plan, boolean local, TableDesc tt_desc) throws SemanticException {
    if (path == null || alias == null) {
        return;
    }
    if (topOp instanceof TableScanOperator) {
        try {
            Utilities.addSchemaEvolutionToTableScanOperator((StructObjectInspector) tt_desc.getDeserializer().getObjectInspector(), (TableScanOperator) topOp);
        } catch (Exception e) {
            throw new SemanticException(e);
        }
    }
    if (!local) {
        plan.addPathToAlias(path, alias);
        plan.addPathToPartitionInfo(path, new PartitionDesc(tt_desc, null));
        plan.getAliasToWork().put(alias, topOp);
    } else {
        // populate local work if needed
        MapredLocalWork localPlan = plan.getMapRedLocalWork();
        if (localPlan == null) {
            localPlan = new MapredLocalWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(), new LinkedHashMap<String, FetchWork>());
        }
        assert localPlan.getAliasToWork().get(alias) == null;
        assert localPlan.getAliasToFetchWork().get(alias) == null;
        localPlan.getAliasToWork().put(alias, topOp);
        localPlan.getAliasToFetchWork().put(alias, new FetchWork(new Path(alias), tt_desc));
        plan.setMapRedLocalWork(localPlan);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) LinkedHashMap(java.util.LinkedHashMap)

Example 13 with FetchWork

use of org.apache.hadoop.hive.ql.plan.FetchWork in project hive by apache.

the class SMBMapJoinOperator method initializeMapredLocalWork.

public void initializeMapredLocalWork(MapJoinDesc mjConf, Configuration hconf, MapredLocalWork localWork, Logger l4j) throws HiveException {
    if (localWork == null || localWorkInited) {
        return;
    }
    localWorkInited = true;
    this.localWork = localWork;
    aliasToMergeQueue = new HashMap<String, MergeQueue>();
    // create map local operators
    Map<String, FetchWork> aliasToFetchWork = localWork.getAliasToFetchWork();
    Map<String, Operator<? extends OperatorDesc>> aliasToWork = localWork.getAliasToWork();
    Map<String, DummyStoreOperator> aliasToSinkWork = conf.getAliasToSink();
    // Look at comments in DummyStoreOperator for additional explanation.
    for (Map.Entry<String, FetchWork> entry : aliasToFetchWork.entrySet()) {
        String alias = entry.getKey();
        FetchWork fetchWork = entry.getValue();
        JobConf jobClone = new JobConf(hconf);
        if (UserGroupInformation.isSecurityEnabled()) {
            String hadoopAuthToken = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
            if (hadoopAuthToken != null) {
                jobClone.set("mapreduce.job.credentials.binary", hadoopAuthToken);
            }
        }
        TableScanOperator ts = (TableScanOperator) aliasToWork.get(alias);
        // push down projections
        ColumnProjectionUtils.appendReadColumns(jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
        // push down filters
        HiveInputFormat.pushFilters(jobClone, ts);
        AcidUtils.setTransactionalTableScan(jobClone, ts.getConf().isAcidTable());
        AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().getAcidOperationalProperties());
        ts.passExecContext(getExecContext());
        FetchOperator fetchOp = new FetchOperator(fetchWork, jobClone);
        ts.initialize(jobClone, new ObjectInspector[] { fetchOp.getOutputObjectInspector() });
        fetchOp.clearFetchContext();
        DummyStoreOperator sinkOp = aliasToSinkWork.get(alias);
        MergeQueue mergeQueue = new MergeQueue(alias, fetchWork, jobClone, ts, sinkOp);
        aliasToMergeQueue.put(alias, mergeQueue);
        l4j.info("fetch operators for " + alias + " initialized");
    }
}
Also used : FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) HashMap(java.util.HashMap) Map(java.util.Map) JobConf(org.apache.hadoop.mapred.JobConf)

Example 14 with FetchWork

use of org.apache.hadoop.hive.ql.plan.FetchWork in project hive by apache.

the class BaseSemanticAnalyzer method createFetchTask.

/**
   * Create a FetchTask for a given schema.
   *
   * @param schema string
   */
protected FetchTask createFetchTask(String schema) {
    Properties prop = new Properties();
    // Sets delimiter to tab (ascii 9)
    prop.setProperty(serdeConstants.SERIALIZATION_FORMAT, Integer.toString(Utilities.tabCode));
    prop.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, " ");
    String[] colTypes = schema.split("#");
    prop.setProperty("columns", colTypes[0]);
    prop.setProperty("columns.types", colTypes[1]);
    prop.setProperty(serdeConstants.SERIALIZATION_LIB, LazySimpleSerDe.class.getName());
    FetchWork fetch = new FetchWork(ctx.getResFile(), new TableDesc(TextInputFormat.class, IgnoreKeyTextOutputFormat.class, prop), -1);
    fetch.setSerializationNullFormat(" ");
    return (FetchTask) TaskFactory.get(fetch, conf);
}
Also used : TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) QueryProperties(org.apache.hadoop.hive.ql.QueryProperties) Properties(java.util.Properties) IgnoreKeyTextOutputFormat(org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask)

Example 15 with FetchWork

use of org.apache.hadoop.hive.ql.plan.FetchWork in project SQLWindowing by hbutani.

the class QueryOutputPrinter method setupFetchOperator.

FetchOperator setupFetchOperator(QueryDef qry, TableDesc tDesc, JobConf jcfg) {
    FetchWork fW = new FetchWork(qry.getOutput().getSpec().getPath(), tDesc);
    FetchOperator fOp = new FetchOperator(fW, jcfg);
    return fOp;
}
Also used : FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) FetchOperator(org.apache.hadoop.hive.ql.exec.FetchOperator)

Aggregations

FetchWork (org.apache.hadoop.hive.ql.plan.FetchWork)15 Path (org.apache.hadoop.fs.Path)9 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)7 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)7 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)5 LinkedHashMap (java.util.LinkedHashMap)5 Map (java.util.Map)5 FetchTask (org.apache.hadoop.hive.ql.exec.FetchTask)5 MapredLocalWork (org.apache.hadoop.hive.ql.plan.MapredLocalWork)5 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)5 Operator (org.apache.hadoop.hive.ql.exec.Operator)4 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)4 HiveConf (org.apache.hadoop.hive.conf.HiveConf)3 FetchOperator (org.apache.hadoop.hive.ql.exec.FetchOperator)3 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)3 LazySimpleSerDe (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)3 IOException (java.io.IOException)2