Search in sources :

Example 21 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class Compiler method setSchema.

/**
 * Get a Schema with fields represented with native Hive types.
 */
private void setSchema(BaseSemanticAnalyzer sem) {
    Schema schema = new Schema();
    // failing that, give up.
    if (sem == null) {
        LOG.info("No semantic analyzer, using empty schema.");
    } else if (sem.getResultSchema() != null) {
        List<FieldSchema> lst = sem.getResultSchema();
        schema = new Schema(lst, null);
    } else if (sem.getFetchTask() != null) {
        FetchTask ft = sem.getFetchTask();
        TableDesc td = ft.getTblDesc();
        // objects, each with a table desc. Let's try to fetch the desc for the first partition and use it's deserializer.
        if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) {
            if (ft.getWork().getPartDesc().size() > 0) {
                td = ft.getWork().getPartDesc().get(0).getTableDesc();
            }
        }
        if (td == null) {
            LOG.info("No returning schema, using empty schema");
        } else {
            String tableName = "result";
            List<FieldSchema> lst = null;
            try {
                lst = HiveMetaStoreUtils.getFieldsFromDeserializer(tableName, td.getDeserializer(driverContext.getConf()));
            } catch (Exception e) {
                LOG.warn("Error getting schema", e);
            }
            if (lst != null) {
                schema = new Schema(lst, null);
            }
        }
    }
    LOG.info("Created Hive schema: " + schema);
    driverContext.setSchema(schema);
}
Also used : Schema(org.apache.hadoop.hive.metastore.api.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) List(java.util.List) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) ReCompileException(org.apache.hadoop.hive.ql.reexec.ReCompileException) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) TException(org.apache.thrift.TException) IOException(java.io.IOException) CommandProcessorException(org.apache.hadoop.hive.ql.processors.CommandProcessorException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask)

Example 22 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class Executor method useFetchFromCache.

private void useFetchFromCache(CacheEntry cacheEntry) {
    // Change query FetchTask to use new location specified in results cache.
    FetchTask fetchTaskFromCache = (FetchTask) TaskFactory.get(cacheEntry.getFetchWork());
    fetchTaskFromCache.initialize(driverContext.getQueryState(), driverContext.getPlan(), null, context);
    driverContext.getPlan().setFetchTask(fetchTaskFromCache);
    driverContext.setCacheUsage(new CacheUsage(CacheUsage.CacheStatus.QUERY_USING_CACHE, cacheEntry));
}
Also used : FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) CacheUsage(org.apache.hadoop.hive.ql.cache.results.CacheUsage)

Example 23 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class GenTezUtils method processFileSink.

public static void processFileSink(GenTezProcContext context, FileSinkOperator fileSink) throws SemanticException {
    ParseContext parseContext = context.parseContext;
    // is INSERT OVERWRITE TABLE
    boolean isInsertTable = GenMapRedUtils.isInsertInto(parseContext, fileSink);
    HiveConf hconf = parseContext.getConf();
    boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask, hconf, fileSink, context.currentTask, isInsertTable);
    Path finalName = GenMapRedUtils.createMoveTask(context.currentTask, chDir, fileSink, parseContext, context.moveTask, hconf, context.dependencyTask);
    if (chDir) {
        // Merge the files in the destination table/partitions by creating Map-only merge job
        // If underlying data is RCFile or OrcFile, RCFileBlockMerge task or
        // OrcFileStripeMerge task would be created.
        LOG.info("using CombineHiveInputformat for the merge job");
        Utilities.FILE_OP_LOGGER.debug("will generate MR work for merging files from " + fileSink.getConf().getDirName() + " to " + finalName);
        GenMapRedUtils.createMRWorkForMergingFiles(fileSink, finalName, context.dependencyTask, context.moveTask, hconf, context.currentTask, parseContext.getQueryState().getLineageState());
    }
    FetchTask fetchTask = parseContext.getFetchTask();
    if (fetchTask != null && context.currentTask.getNumChild() == 0) {
        if (fetchTask.isFetchFrom(fileSink.getConf())) {
            context.currentTask.setFetchSource(true);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveConf(org.apache.hadoop.hive.conf.HiveConf) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask)

Example 24 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class BaseSemanticAnalyzer method createFetchTask.

/**
 * Create a FetchTask for a given schema.
 */
protected FetchTask createFetchTask(String tableSchema) {
    String schema = "json".equals(conf.get(HiveConf.ConfVars.HIVE_DDL_OUTPUT_FORMAT.varname, "text")) ? "json#string" : tableSchema;
    Properties prop = new Properties();
    // Sets delimiter to tab (ascii 9)
    prop.setProperty(serdeConstants.SERIALIZATION_FORMAT, Integer.toString(Utilities.tabCode));
    prop.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, " ");
    String[] colTypes = schema.split("#");
    prop.setProperty("columns", colTypes[0]);
    prop.setProperty("columns.types", colTypes[1]);
    prop.setProperty(serdeConstants.SERIALIZATION_LIB, LazySimpleSerDe.class.getName());
    prop.setProperty(hive_metastoreConstants.TABLE_BUCKETING_VERSION, "-1");
    FetchWork fetch = new FetchWork(ctx.getResFile(), new TableDesc(TextInputFormat.class, IgnoreKeyTextOutputFormat.class, prop), -1);
    fetch.setSerializationNullFormat(" ");
    return (FetchTask) TaskFactory.get(fetch);
}
Also used : TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) QueryProperties(org.apache.hadoop.hive.ql.QueryProperties) Properties(java.util.Properties) IgnoreKeyTextOutputFormat(org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask)

Example 25 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class ExplainSemanticAnalyzer method analyzeInternal.

@Override
public void analyzeInternal(ASTNode ast) throws SemanticException {
    final int childCount = ast.getChildCount();
    // Skip TOK_QUERY.
    int i = 1;
    while (i < childCount) {
        int explainOptions = ast.getChild(i).getType();
        if (explainOptions == HiveParser.KW_FORMATTED) {
            config.setFormatted(true);
        } else if (explainOptions == HiveParser.KW_EXTENDED) {
            config.setExtended(true);
        } else if (explainOptions == HiveParser.KW_DEPENDENCY) {
            config.setDependency(true);
        } else if (explainOptions == HiveParser.KW_CBO) {
            config.setCbo(true);
        } else if (explainOptions == HiveParser.KW_COST) {
            config.setCboCost(true);
        } else if (explainOptions == HiveParser.KW_JOINCOST) {
            config.setCboJoinCost(true);
        } else if (explainOptions == HiveParser.KW_LOGICAL) {
            config.setLogical(true);
        } else if (explainOptions == HiveParser.KW_AUTHORIZATION) {
            config.setAuthorize(true);
        } else if (explainOptions == HiveParser.KW_ANALYZE) {
            config.setAnalyze(AnalyzeState.RUNNING);
            config.setExplainRootPath(ctx.getMRTmpPath());
        } else if (explainOptions == HiveParser.KW_VECTORIZATION) {
            config.setVectorization(true);
            if (i + 1 < childCount) {
                int vectorizationOption = ast.getChild(i + 1).getType();
                // [ONLY]
                if (vectorizationOption == HiveParser.TOK_ONLY) {
                    config.setVectorizationOnly(true);
                    i++;
                    if (i + 1 >= childCount) {
                        break;
                    }
                    vectorizationOption = ast.getChild(i + 1).getType();
                }
                // [SUMMARY|OPERATOR|EXPRESSION|DETAIL]
                if (vectorizationOption == HiveParser.TOK_SUMMARY) {
                    config.setVectorizationDetailLevel(VectorizationDetailLevel.SUMMARY);
                    i++;
                } else if (vectorizationOption == HiveParser.TOK_OPERATOR) {
                    config.setVectorizationDetailLevel(VectorizationDetailLevel.OPERATOR);
                    i++;
                } else if (vectorizationOption == HiveParser.TOK_EXPRESSION) {
                    config.setVectorizationDetailLevel(VectorizationDetailLevel.EXPRESSION);
                    i++;
                } else if (vectorizationOption == HiveParser.TOK_DETAIL) {
                    config.setVectorizationDetailLevel(VectorizationDetailLevel.DETAIL);
                    i++;
                }
            }
        } else if (explainOptions == HiveParser.KW_LOCKS) {
            config.setLocks(true);
        } else if (explainOptions == HiveParser.KW_AST) {
            config.setAst(true);
        } else if (explainOptions == HiveParser.KW_DEBUG) {
            config.setDebug(true);
        } else if (explainOptions == HiveParser.KW_DDL) {
            config.setDDL(true);
            config.setCbo(true);
            config.setVectorization(true);
        } else {
        // UNDONE: UNKNOWN OPTION?
        }
        i++;
    }
    ctx.setExplainConfig(config);
    ctx.setExplainPlan(true);
    ASTNode input = (ASTNode) ast.getChild(0);
    // step 2 (ANALYZE_STATE.ANALYZING), explain the query and provide the runtime #rows collected.
    if (config.getAnalyze() == AnalyzeState.RUNNING) {
        String query = ctx.getTokenRewriteStream().toString(input.getTokenStartIndex(), input.getTokenStopIndex());
        LOG.info("Explain analyze (running phase) for query " + query);
        conf.unset(ValidTxnList.VALID_TXNS_KEY);
        conf.unset(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY);
        Context runCtx = null;
        try {
            runCtx = new Context(conf);
            // runCtx and ctx share the configuration, but not isExplainPlan()
            runCtx.setExplainConfig(config);
            try (Driver driver = new Driver(conf, runCtx, queryState.getLineageState())) {
                driver.run(query);
                while (driver.getResults(new ArrayList<String>())) {
                }
            } catch (CommandProcessorException e) {
                if (e.getCause() instanceof ReCompileException) {
                    throw (ReCompileException) e.getCause();
                } else {
                    throw new SemanticException(e.getMessage(), e);
                }
            }
            config.setOpIdToRuntimeNumRows(aggregateStats(config.getExplainRootPath()));
        } catch (IOException e1) {
            throw new SemanticException(e1);
        }
        ctx.resetOpContext();
        ctx.resetStream();
        TaskFactory.resetId();
        LOG.info("Explain analyze (analyzing phase) for query " + query);
        config.setAnalyze(AnalyzeState.ANALYZING);
    }
    // Creating new QueryState unfortunately causes all .q.out to change - do this in a separate ticket
    // Sharing QueryState between generating the plan and executing the query seems bad
    // BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(new QueryState(queryState.getConf()), input);
    BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, input);
    sem.analyze(input, ctx);
    sem.validate();
    inputs = sem.getInputs();
    outputs = sem.getOutputs();
    ctx.setResFile(ctx.getLocalTmpPath());
    List<Task<?>> tasks = sem.getAllRootTasks();
    if (tasks == null) {
        tasks = Collections.emptyList();
    }
    FetchTask fetchTask = sem.getFetchTask();
    if (fetchTask != null) {
        // Initialize fetch work such that operator tree will be constructed.
        fetchTask.getWork().initializeForFetch(ctx.getOpContext());
    }
    if (sem instanceof SemanticAnalyzer) {
        pCtx = sem.getParseContext();
    }
    config.setUserLevelExplain(!config.isExtended() && !config.isFormatted() && !config.isDependency() && !config.isCbo() && !config.isLogical() && !config.isVectorization() && !config.isAuthorize() && ((HiveConf.getBoolVar(ctx.getConf(), HiveConf.ConfVars.HIVE_EXPLAIN_USER) && HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) || (HiveConf.getBoolVar(ctx.getConf(), HiveConf.ConfVars.HIVE_SPARK_EXPLAIN_USER) && HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark"))));
    ExplainWork work = new ExplainWork(ctx.getResFile(), pCtx, tasks, fetchTask, input, sem, config, ctx.getCboInfo(), ctx.getOptimizedSql(), ctx.getCalcitePlan());
    work.setAppendTaskType(HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES));
    ExplainTask explTask = (ExplainTask) TaskFactory.get(work);
    fieldList = ExplainTask.getResultSchema();
    rootTasks.add(explTask);
}
Also used : StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) Context(org.apache.hadoop.hive.ql.Context) Task(org.apache.hadoop.hive.ql.exec.Task) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) ExplainTask(org.apache.hadoop.hive.ql.exec.ExplainTask) CommandProcessorException(org.apache.hadoop.hive.ql.processors.CommandProcessorException) ExplainTask(org.apache.hadoop.hive.ql.exec.ExplainTask) Driver(org.apache.hadoop.hive.ql.Driver) ExplainWork(org.apache.hadoop.hive.ql.plan.ExplainWork) ReCompileException(org.apache.hadoop.hive.ql.reexec.ReCompileException) IOException(java.io.IOException) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask)

Aggregations

FetchTask (org.apache.hadoop.hive.ql.exec.FetchTask)27 ArrayList (java.util.ArrayList)11 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 List (java.util.List)7 ValidTxnList (org.apache.hadoop.hive.common.ValidTxnList)7 Path (org.apache.hadoop.fs.Path)6 FetchWork (org.apache.hadoop.hive.ql.plan.FetchWork)6 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)6 Test (org.junit.Test)6 IOException (java.io.IOException)5 HiveConf (org.apache.hadoop.hive.conf.HiveConf)5 Context (org.apache.hadoop.hive.ql.Context)4 CacheUsage (org.apache.hadoop.hive.ql.cache.results.CacheUsage)4 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)4 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)4 Operator (org.apache.hadoop.hive.ql.exec.Operator)3 Task (org.apache.hadoop.hive.ql.exec.Task)3 CommandProcessorException (org.apache.hadoop.hive.ql.processors.CommandProcessorException)3 LinkedHashMap (java.util.LinkedHashMap)2 LinkedHashSet (java.util.LinkedHashSet)2