Search in sources :

Example 16 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class RemoteHiveSparkClient method submit.

private SparkJobRef submit(final DriverContext driverContext, final SparkWork sparkWork) throws Exception {
    final Context ctx = driverContext.getCtx();
    final HiveConf hiveConf = (HiveConf) ctx.getConf();
    refreshLocalResources(sparkWork, hiveConf);
    final JobConf jobConf = new JobConf(hiveConf);
    //update the credential provider location in the jobConf
    HiveConfUtil.updateJobCredentialProviders(jobConf);
    // Create temporary scratch dir
    final Path emptyScratchDir = ctx.getMRTmpPath();
    FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
    fs.mkdirs(emptyScratchDir);
    byte[] jobConfBytes = KryoSerializer.serializeJobConf(jobConf);
    byte[] scratchDirBytes = KryoSerializer.serialize(emptyScratchDir);
    byte[] sparkWorkBytes = KryoSerializer.serialize(sparkWork);
    JobStatusJob job = new JobStatusJob(jobConfBytes, scratchDirBytes, sparkWorkBytes);
    if (driverContext.isShutdown()) {
        throw new HiveException("Operation is cancelled.");
    }
    JobHandle<Serializable> jobHandle = remoteClient.submit(job);
    RemoteSparkJobStatus sparkJobStatus = new RemoteSparkJobStatus(remoteClient, jobHandle, sparkClientTimtout);
    return new RemoteSparkJobRef(hiveConf, jobHandle, sparkJobStatus);
}
Also used : Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) JobContext(org.apache.hive.spark.client.JobContext) Path(org.apache.hadoop.fs.Path) Serializable(java.io.Serializable) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) RemoteSparkJobRef(org.apache.hadoop.hive.ql.exec.spark.status.impl.RemoteSparkJobRef) FileSystem(org.apache.hadoop.fs.FileSystem) RemoteSparkJobStatus(org.apache.hadoop.hive.ql.exec.spark.status.impl.RemoteSparkJobStatus) HiveConf(org.apache.hadoop.hive.conf.HiveConf) JobConf(org.apache.hadoop.mapred.JobConf)

Example 17 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class GenMapRedUtils method splitTasks.

@SuppressWarnings("nls")
private static /**
   * Split two tasks by creating a temporary file between them.
   *
   * @param op reduce sink operator being processed
   * @param parentTask the parent task
   * @param childTask the child task
   * @param opProcCtx context
   **/
void splitTasks(ReduceSinkOperator op, Task<? extends Serializable> parentTask, Task<? extends Serializable> childTask, GenMRProcContext opProcCtx) throws SemanticException {
    if (op.getNumParent() != 1) {
        throw new IllegalStateException("Expecting operator " + op + " to have one parent. " + "But found multiple parents : " + op.getParentOperators());
    }
    ParseContext parseCtx = opProcCtx.getParseCtx();
    parentTask.addDependentTask(childTask);
    // Root Task cannot depend on any other task, therefore childTask cannot be
    // a root Task
    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();
    if (rootTasks.contains(childTask)) {
        rootTasks.remove(childTask);
    }
    // Generate the temporary file name
    Context baseCtx = parseCtx.getContext();
    Path taskTmpDir = baseCtx.getMRTmpPath();
    Operator<? extends OperatorDesc> parent = op.getParentOperators().get(0);
    TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
    // Create the temporary file, its corresponding FileSinkOperaotr, and
    // its corresponding TableScanOperator.
    TableScanOperator tableScanOp = createTemporaryFile(parent, op, taskTmpDir, tt_desc, parseCtx);
    Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
    mapCurrCtx.put(tableScanOp, new GenMapRedCtx(childTask, null));
    String streamDesc = taskTmpDir.toUri().toString();
    MapredWork cplan = (MapredWork) childTask.getWork();
    if (needsTagging(cplan.getReduceWork())) {
        Operator<? extends OperatorDesc> reducerOp = cplan.getReduceWork().getReducer();
        String id = null;
        if (reducerOp instanceof JoinOperator) {
            if (parseCtx.getJoinOps().contains(reducerOp)) {
                id = ((JoinOperator) reducerOp).getConf().getId();
            }
        } else if (reducerOp instanceof MapJoinOperator) {
            if (parseCtx.getMapJoinOps().contains(reducerOp)) {
                id = ((MapJoinOperator) reducerOp).getConf().getId();
            }
        } else if (reducerOp instanceof SMBMapJoinOperator) {
            if (parseCtx.getSmbMapJoinOps().contains(reducerOp)) {
                id = ((SMBMapJoinOperator) reducerOp).getConf().getId();
            }
        }
        if (id != null) {
            streamDesc = id + ":$INTNAME";
        } else {
            streamDesc = "$INTNAME";
        }
        String origStreamDesc = streamDesc;
        int pos = 0;
        while (cplan.getMapWork().getAliasToWork().get(streamDesc) != null) {
            streamDesc = origStreamDesc.concat(String.valueOf(++pos));
        }
        // TODO: Allocate work to remove the temporary files and make that
        // dependent on the redTask
        cplan.getReduceWork().setNeedsTagging(true);
    }
    // Add the path to alias mapping
    setTaskPlan(taskTmpDir, streamDesc, tableScanOp, cplan.getMapWork(), false, tt_desc);
    opProcCtx.setCurrTopOp(null);
    opProcCtx.setCurrAliasId(null);
    opProcCtx.setCurrTask(childTask);
    opProcCtx.addRootIfPossible(parentTask);
}
Also used : ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) Path(org.apache.hadoop.fs.Path) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) DemuxOperator(org.apache.hadoop.hive.ql.exec.DemuxOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) DependencyCollectionTask(org.apache.hadoop.hive.ql.exec.DependencyCollectionTask) Serializable(java.io.Serializable) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) GenMapRedCtx(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 18 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class GlobalLimitOptimizer method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    Context ctx = pctx.getContext();
    Map<String, TableScanOperator> topOps = pctx.getTopOps();
    GlobalLimitCtx globalLimitCtx = pctx.getGlobalLimitCtx();
    Map<String, SplitSample> nameToSplitSample = pctx.getNameToSplitSample();
    // is used.
    if (ctx.getTryCount() == 0 && topOps.size() == 1 && !globalLimitCtx.ifHasTransformOrUDTF() && nameToSplitSample.isEmpty()) {
        // Here we recursively check:
        // 1. whether there are exact one LIMIT in the query
        // 2. whether there is no aggregation, group-by, distinct, sort by,
        //    distributed by, or table sampling in any of the sub-query.
        // The query only qualifies if both conditions are satisfied.
        //
        // Example qualified queries:
        //    CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
        //    INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
        //                               FROM ... LIMIT...
        //    SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
        //
        TableScanOperator ts = topOps.values().iterator().next();
        LimitOperator tempGlobalLimit = checkQbpForGlobalLimit(ts);
        // query qualify for the optimization
        if (tempGlobalLimit != null) {
            LimitDesc tempGlobalLimitDesc = tempGlobalLimit.getConf();
            Table tab = ts.getConf().getTableMetadata();
            Set<FilterOperator> filterOps = OperatorUtils.findOperators(ts, FilterOperator.class);
            if (!tab.isPartitioned()) {
                if (filterOps.size() == 0) {
                    Integer tempOffset = tempGlobalLimitDesc.getOffset();
                    globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), (tempOffset == null) ? 0 : tempOffset);
                }
            } else {
                // check if the pruner only contains partition columns
                if (onlyContainsPartnCols(tab, filterOps)) {
                    String alias = (String) topOps.keySet().toArray()[0];
                    PrunedPartitionList partsList = pctx.getPrunedPartitions(alias, ts);
                    // the filter to prune correctly
                    if (!partsList.hasUnknownPartitions()) {
                        Integer tempOffset = tempGlobalLimitDesc.getOffset();
                        globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), (tempOffset == null) ? 0 : tempOffset);
                    }
                }
            }
            if (globalLimitCtx.isEnable()) {
                LOG.info("Qualify the optimize that reduces input size for 'offset' for offset " + globalLimitCtx.getGlobalOffset());
                LOG.info("Qualify the optimize that reduces input size for 'limit' for limit " + globalLimitCtx.getGlobalLimit());
            }
        }
    }
    return pctx;
}
Also used : Context(org.apache.hadoop.hive.ql.Context) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) SplitSample(org.apache.hadoop.hive.ql.parse.SplitSample) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) GlobalLimitCtx(org.apache.hadoop.hive.ql.parse.GlobalLimitCtx)

Example 19 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class RewriteParseContextGenerator method generateOperatorTree.

/**
   * Parse the input {@link String} command and generate an operator tree.
   * @param conf
   * @param command
   * @throws SemanticException
   */
public static Operator<? extends OperatorDesc> generateOperatorTree(QueryState queryState, String command) throws SemanticException {
    Operator<? extends OperatorDesc> operatorTree;
    try {
        Context ctx = new Context(queryState.getConf());
        ASTNode tree = ParseUtils.parse(command, ctx);
        BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, tree);
        assert (sem instanceof SemanticAnalyzer);
        operatorTree = doSemanticAnalysis((SemanticAnalyzer) sem, tree, ctx);
        LOG.info("Sub-query Semantic Analysis Completed");
    } catch (IOException e) {
        LOG.error("IOException in generating the operator " + "tree for input command - " + command + " ", e);
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    } catch (ParseException e) {
        LOG.error("ParseException in generating the operator " + "tree for input command - " + command + " ", e);
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    } catch (SemanticException e) {
        LOG.error("SemanticException in generating the operator " + "tree for input command - " + command + " ", e);
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    }
    return operatorTree;
}
Also used : ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Context(org.apache.hadoop.hive.ql.Context) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) SemanticAnalyzer(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) IOException(java.io.IOException) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 20 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class ColumnStatsSemanticAnalyzer method genRewrittenTree.

private ASTNode genRewrittenTree(String rewrittenQuery) throws SemanticException {
    ASTNode rewrittenTree;
    // Parse the rewritten query string
    try {
        ctx = new Context(conf);
    } catch (IOException e) {
        throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_IO_ERROR.getMsg());
    }
    ctx.setCmd(rewrittenQuery);
    try {
        rewrittenTree = ParseUtils.parse(rewrittenQuery, ctx);
    } catch (ParseException e) {
        throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_PARSE_ERROR.getMsg());
    }
    return rewrittenTree;
}
Also used : Context(org.apache.hadoop.hive.ql.Context) IOException(java.io.IOException)

Aggregations

Context (org.apache.hadoop.hive.ql.Context)47 Path (org.apache.hadoop.fs.Path)19 IOException (java.io.IOException)15 DriverContext (org.apache.hadoop.hive.ql.DriverContext)15 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)13 FileSystem (org.apache.hadoop.fs.FileSystem)9 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 Serializable (java.io.Serializable)8 Task (org.apache.hadoop.hive.ql.exec.Task)7 HiveConf (org.apache.hadoop.hive.conf.HiveConf)5 Table (org.apache.hadoop.hive.ql.metadata.Table)5 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)5 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)5 ArrayList (java.util.ArrayList)4 EnvironmentContext (org.apache.hadoop.hive.metastore.api.EnvironmentContext)4 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)4 HiveTxnManager (org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager)4 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)4 JobClient (org.apache.hadoop.mapred.JobClient)4 JobConf (org.apache.hadoop.mapred.JobConf)4