Search in sources :

Example 16 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class SemanticAnalyzer method analyzeInternal.

void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticException {
    // 1. Generate Resolved Parse tree from syntax tree
    LOG.info("Starting Semantic Analysis");
    //change the location of position alias process here
    processPositionAlias(ast);
    if (!genResolvedParseTree(ast, plannerCtx)) {
        return;
    }
    // 2. Gen OP Tree from resolved Parse Tree
    Operator sinkOp = genOPTree(ast, plannerCtx);
    if (!unparseTranslator.isEnabled() && tableMask.isEnabled()) {
        // Here we rewrite the * and also the masking table
        ASTNode tree = rewriteASTWithMaskAndFilter(ast);
        if (tree != ast) {
            ctx.setSkipTableMasking(true);
            init(true);
            //change the location of position alias process here
            processPositionAlias(tree);
            genResolvedParseTree(tree, plannerCtx);
            if (this instanceof CalcitePlanner) {
                ((CalcitePlanner) this).resetCalciteConfiguration();
            }
            sinkOp = genOPTree(tree, plannerCtx);
        }
    }
    // 3. Deduce Resultset Schema
    if (createVwDesc != null && !this.ctx.isCboSucceeded()) {
        resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
    } else {
        // succeeds.
        if (resultSchema == null) {
            resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
        }
    }
    // 4. Generate Parse Context for Optimizer & Physical compiler
    copyInfoToQueryProperties(queryProperties);
    ParseContext pCtx = new ParseContext(queryState, opToPartPruner, opToPartList, topOps, new HashSet<JoinOperator>(joinContext.keySet()), new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);
    // 5. Take care of view creation
    if (createVwDesc != null) {
        if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) {
            return;
        }
        if (!ctx.isCboSucceeded()) {
            saveViewDefinition();
        }
        // validate the create view statement at this point, the createVwDesc gets
        // all the information for semanticcheck
        validateCreateView();
        if (!createVwDesc.isMaterialized()) {
            // Since we're only creating a view (not executing it), we don't need to
            // optimize or translate the plan (and in fact, those procedures can
            // interfere with the view creation). So skip the rest of this method.
            ctx.setResDir(null);
            ctx.setResFile(null);
            try {
                PlanUtils.addInputsForView(pCtx);
            } catch (HiveException e) {
                throw new SemanticException(e);
            }
            // Generate lineage info for create view statements
            // if LineageLogger hook is configured.
            // Add the transformation that computes the lineage information.
            Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS))));
            if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
                ArrayList<Transform> transformations = new ArrayList<Transform>();
                transformations.add(new HiveOpConverterPostProc());
                transformations.add(new Generator());
                for (Transform t : transformations) {
                    pCtx = t.transform(pCtx);
                }
                // we just use view name as location.
                SessionState.get().getLineageState().mapDirToOp(new Path(createVwDesc.getViewName()), sinkOp);
            }
            return;
        }
    }
    // 6. Generate table access stats if required
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS)) {
        TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx);
        setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess());
    }
    // 7. Perform Logical optimization
    if (LOG.isDebugEnabled()) {
        LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
    }
    Optimizer optm = new Optimizer();
    optm.setPctx(pCtx);
    optm.initialize(conf);
    pCtx = optm.optimize();
    if (pCtx.getColumnAccessInfo() != null) {
        // set ColumnAccessInfo for view column authorization
        setColumnAccessInfo(pCtx.getColumnAccessInfo());
    }
    FetchTask origFetchTask = pCtx.getFetchTask();
    if (LOG.isDebugEnabled()) {
        LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
    }
    // 8. Generate column access stats if required - wait until column pruning
    // takes place during optimization
    boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED);
    if (isColumnInfoNeedForAuth || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
        ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx);
        // view column access info is carried by this.getColumnAccessInfo().
        setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo()));
    }
    // TEZ..)
    if (!ctx.getExplainLogical()) {
        TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx);
        compiler.init(queryState, console, db);
        compiler.compile(pCtx, rootTasks, inputs, outputs);
        fetchTask = pCtx.getFetchTask();
    }
    LOG.info("Completed plan generation");
    // 10. put accessed columns to readEntity
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
        putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
    }
    // 11. if desired check we're not going over partition scan limits
    if (!ctx.isExplainSkipExecution()) {
        enforceScanLimits(pCtx, origFetchTask);
    }
    return;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Optimizer(org.apache.hadoop.hive.ql.optimizer.Optimizer) ArrayList(java.util.ArrayList) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) HiveOpConverterPostProc(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc) Transform(org.apache.hadoop.hive.ql.optimizer.Transform) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) Generator(org.apache.hadoop.hive.ql.optimizer.lineage.Generator)

Example 17 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class Driver method getSchema.

/**
 * Get a Schema with fields represented with native Hive types
 */
private static Schema getSchema(BaseSemanticAnalyzer sem, HiveConf conf) {
    Schema schema = null;
    // give up.
    if (sem == null) {
    // can't get any info without a plan
    } else if (sem.getResultSchema() != null) {
        List<FieldSchema> lst = sem.getResultSchema();
        schema = new Schema(lst, null);
    } else if (sem.getFetchTask() != null) {
        FetchTask ft = sem.getFetchTask();
        TableDesc td = ft.getTblDesc();
        // deserializer.
        if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) {
            if (ft.getWork().getPartDesc().size() > 0) {
                td = ft.getWork().getPartDesc().get(0).getTableDesc();
            }
        }
        if (td == null) {
            LOG.info("No returning schema.");
        } else {
            String tableName = "result";
            List<FieldSchema> lst = null;
            try {
                lst = HiveMetaStoreUtils.getFieldsFromDeserializer(tableName, td.getDeserializer(conf));
            } catch (Exception e) {
                LOG.warn("Error getting schema: " + org.apache.hadoop.util.StringUtils.stringifyException(e));
            }
            if (lst != null) {
                schema = new Schema(lst, null);
            }
        }
    }
    if (schema == null) {
        schema = new Schema();
    }
    LOG.info("Returning Hive schema: " + schema);
    return schema;
}
Also used : Schema(org.apache.hadoop.hive.metastore.api.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) List(java.util.List) LinkedList(java.util.LinkedList) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask)

Example 18 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class TestHCatMultiOutputFormat method getTableData.

/**
 * Method to fetch table data
 *
 * @param table table name
 * @param database database
 * @return list of columns in comma seperated way
 * @throws Exception if any error occurs
 */
private List<String> getTableData(String table, String database) throws Exception {
    QueryState queryState = new QueryState.Builder().build();
    HiveConf conf = queryState.getConf();
    conf.addResource("hive-site.xml");
    ArrayList<String> results = new ArrayList<String>();
    ArrayList<String> temp = new ArrayList<String>();
    Hive hive = Hive.get(conf);
    org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table);
    FetchWork work;
    if (!tbl.getPartCols().isEmpty()) {
        List<Partition> partitions = hive.getPartitions(tbl);
        List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
        List<Path> partLocs = new ArrayList<Path>();
        TableDesc tableDesc = Utilities.getTableDesc(tbl);
        for (Partition part : partitions) {
            partLocs.add(part.getDataLocation());
            partDesc.add(Utilities.getPartitionDescFromTableDesc(tableDesc, part, true));
        }
        work = new FetchWork(partLocs, partDesc, tableDesc);
        work.setLimit(100);
    } else {
        work = new FetchWork(tbl.getDataLocation(), Utilities.getTableDesc(tbl));
    }
    FetchTask task = new FetchTask();
    task.setWork(work);
    conf.set("_hive.hdfs.session.path", "path");
    conf.set("_hive.local.session.path", "path");
    task.initialize(queryState, null, null, new org.apache.hadoop.hive.ql.Context(conf));
    task.fetch(temp);
    for (String str : temp) {
        results.add(str.replace("\t", ","));
    }
    return results;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) ArrayList(java.util.ArrayList) QueryState(org.apache.hadoop.hive.ql.QueryState) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) Hive(org.apache.hadoop.hive.ql.metadata.Hive) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) HiveConf(org.apache.hadoop.hive.conf.HiveConf) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Example 19 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class SQLOperation method getNextRowSet.

@Override
public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
    validateDefaultFetchOrientation(orientation);
    assertState(Collections.singleton(OperationState.FINISHED));
    FetchTask fetchTask = driver.getFetchTask();
    boolean isBlobBased = false;
    if (fetchTask != null && fetchTask.getWork().isUsingThriftJDBCBinarySerDe()) {
        // Just fetch one blob if we've serialized thrift objects in final tasks
        maxRows = 1;
        isBlobBased = true;
    }
    RowSet rowSet = RowSetFactory.create(getResultSetSchema(), getProtocolVersion(), isBlobBased);
    try {
        /* if client is requesting fetch-from-start and its not the first time reading from this operation
       * then reset the fetch position to beginning
       */
        if (orientation.equals(FetchOrientation.FETCH_FIRST) && fetchStarted) {
            driver.resetFetch();
        }
        fetchStarted = true;
        final int capacity = Math.toIntExact(maxRows);
        convey.ensureCapacity(capacity);
        driver.setMaxRows(capacity);
        if (driver.getResults(convey)) {
            if (convey.size() == capacity) {
                log.info("Result set buffer filled to capacity [{}]", capacity);
            }
            return decode(convey, rowSet);
        }
        return rowSet;
    } catch (Exception e) {
        throw new HiveSQLException("Unable to get the next row set with exception: " + e.getMessage(), e);
    } finally {
        convey.clear();
    }
}
Also used : HiveSQLException(org.apache.hive.service.cli.HiveSQLException) RowSet(org.apache.hive.service.cli.RowSet) CommandProcessorException(org.apache.hadoop.hive.ql.processors.CommandProcessorException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SQLException(java.sql.SQLException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) HiveSQLException(org.apache.hive.service.cli.HiveSQLException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask)

Example 20 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class TestScheduledQueryService method getNumRowsReturned.

private int getNumRowsReturned(IDriver driver, String query) throws Exception {
    driver.run(query);
    FetchTask ft = driver.getFetchTask();
    List<?> res = new ArrayList<>();
    if (ft == null) {
        return 0;
    }
    ft.fetch(res);
    return res.size();
}
Also used : ArrayList(java.util.ArrayList) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask)

Aggregations

FetchTask (org.apache.hadoop.hive.ql.exec.FetchTask)27 ArrayList (java.util.ArrayList)11 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 List (java.util.List)7 ValidTxnList (org.apache.hadoop.hive.common.ValidTxnList)7 Path (org.apache.hadoop.fs.Path)6 FetchWork (org.apache.hadoop.hive.ql.plan.FetchWork)6 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)6 Test (org.junit.Test)6 IOException (java.io.IOException)5 HiveConf (org.apache.hadoop.hive.conf.HiveConf)5 Context (org.apache.hadoop.hive.ql.Context)4 CacheUsage (org.apache.hadoop.hive.ql.cache.results.CacheUsage)4 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)4 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)4 Operator (org.apache.hadoop.hive.ql.exec.Operator)3 Task (org.apache.hadoop.hive.ql.exec.Task)3 CommandProcessorException (org.apache.hadoop.hive.ql.processors.CommandProcessorException)3 LinkedHashMap (java.util.LinkedHashMap)2 LinkedHashSet (java.util.LinkedHashSet)2