Search in sources :

Example 16 with QueryPlan

use of org.apache.hadoop.hive.ql.QueryPlan in project hive by apache.

the class PostExecOrcFileDump method run.

@Override
public void run(HookContext hookContext) throws Exception {
    assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK);
    HiveConf conf = hookContext.getConf();
    LOG.info("Executing post execution hook to print orc file dump..");
    QueryPlan plan = hookContext.getQueryPlan();
    if (plan == null) {
        return;
    }
    FetchTask fetchTask = plan.getFetchTask();
    if (fetchTask != null) {
        SessionState ss = SessionState.get();
        SessionState.LogHelper console = ss.getConsole();
        // file dump should write to session state console's error stream
        PrintStream old = System.out;
        System.setOut(console.getErrStream());
        FetchWork fetchWork = fetchTask.getWork();
        boolean partitionedTable = fetchWork.isPartitioned();
        List<Path> directories;
        if (partitionedTable) {
            LOG.info("Printing orc file dump for files from partitioned directory..");
            directories = fetchWork.getPartDir();
        } else {
            LOG.info("Printing orc file dump for files from table directory..");
            directories = Lists.newArrayList();
            directories.add(fetchWork.getTblDir());
        }
        for (Path dir : directories) {
            FileSystem fs = dir.getFileSystem(conf);
            List<FileStatus> fileList = HdfsUtils.listLocatedStatus(fs, dir, hiddenFileFilter);
            for (FileStatus fileStatus : fileList) {
                LOG.info("Printing orc file dump for " + fileStatus.getPath());
                if (fileStatus.getLen() > 0) {
                    try {
                        // just creating orc reader is going to do sanity checks to make sure its valid ORC file
                        OrcFile.createReader(fs, fileStatus.getPath());
                        console.printError("-- BEGIN ORC FILE DUMP --");
                        FileDump.main(new String[] { fileStatus.getPath().toString(), "--rowindex=*" });
                        console.printError("-- END ORC FILE DUMP --");
                    } catch (FileFormatException e) {
                        LOG.warn("File " + fileStatus.getPath() + " is not ORC. Skip printing orc file dump");
                    } catch (IOException e) {
                        LOG.warn("Skip printing orc file dump. Exception: " + e.getMessage());
                    }
                } else {
                    LOG.warn("Zero length file encountered. Skip printing orc file dump.");
                }
            }
        }
        // restore the old out stream
        System.out.flush();
        System.setOut(old);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SessionState(org.apache.hadoop.hive.ql.session.SessionState) PrintStream(java.io.PrintStream) FileStatus(org.apache.hadoop.fs.FileStatus) FileFormatException(org.apache.orc.FileFormatException) IOException(java.io.IOException) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) FileSystem(org.apache.hadoop.fs.FileSystem) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) HiveConf(org.apache.hadoop.hive.conf.HiveConf)

Example 17 with QueryPlan

use of org.apache.hadoop.hive.ql.QueryPlan in project hive by apache.

the class PostExecTezSummaryPrinter method run.

@Override
public void run(HookContext hookContext) throws Exception {
    assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK);
    HiveConf conf = hookContext.getConf();
    if (!"tez".equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE))) {
        return;
    }
    LOG.info("Executing post execution hook to print tez summary..");
    SessionState ss = SessionState.get();
    SessionState.LogHelper console = ss.getConsole();
    QueryPlan plan = hookContext.getQueryPlan();
    if (plan == null) {
        return;
    }
    List<TezTask> rootTasks = Utilities.getTezTasks(plan.getRootTasks());
    for (TezTask tezTask : rootTasks) {
        LOG.info("Printing summary for tez task: " + tezTask.getName());
        TezCounters counters = tezTask.getTezCounters();
        if (counters != null) {
            String hiveCountersGroup = HiveConf.getVar(conf, HiveConf.ConfVars.HIVECOUNTERGROUP);
            for (CounterGroup group : counters) {
                if (hiveCountersGroup.equals(group.getDisplayName())) {
                    console.printError(tezTask.getId() + " HIVE COUNTERS:");
                    for (TezCounter counter : group) {
                        console.printError("   " + counter.getDisplayName() + ": " + counter.getValue());
                    }
                } else if (group.getName().equals(FileSystemCounter.class.getName())) {
                    console.printError(tezTask.getId() + " FILE SYSTEM COUNTERS:");
                    for (TezCounter counter : group) {
                        // local file system counters
                        if (counter.getName().contains("HDFS")) {
                            console.printError("   " + counter.getDisplayName() + ": " + counter.getValue());
                        }
                    }
                } else if (group.getName().equals(LlapIOCounters.class.getName())) {
                    console.printError(tezTask.getId() + " LLAP IO COUNTERS:");
                    List<String> testSafeCounters = LlapIOCounters.testSafeCounterNames();
                    for (TezCounter counter : group) {
                        if (testSafeCounters.contains(counter.getDisplayName())) {
                            console.printError("   " + counter.getDisplayName() + ": " + counter.getValue());
                        }
                    }
                }
            }
        }
    }
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) LlapIOCounters(org.apache.hadoop.hive.llap.counters.LlapIOCounters) CounterGroup(org.apache.tez.common.counters.CounterGroup) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TezCounter(org.apache.tez.common.counters.TezCounter) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) TezCounters(org.apache.tez.common.counters.TezCounters)

Example 18 with QueryPlan

use of org.apache.hadoop.hive.ql.QueryPlan in project hive by apache.

the class GenericUDTFGetSplits method createPlanFragment.

public PlanFragment createPlanFragment(String query, int num) throws HiveException {
    HiveConf conf = new HiveConf(SessionState.get().getConf());
    HiveConf.setVar(conf, ConfVars.HIVEFETCHTASKCONVERSION, "none");
    HiveConf.setVar(conf, ConfVars.HIVEQUERYRESULTFILEFORMAT, PlanUtils.LLAP_OUTPUT_FORMAT_KEY);
    String originalMode = HiveConf.getVar(conf, ConfVars.HIVE_EXECUTION_MODE);
    HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
    HiveConf.setBoolVar(conf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS, true);
    HiveConf.setBoolVar(conf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS, true);
    conf.setBoolean(TezSplitGrouper.TEZ_GROUPING_NODE_LOCAL_ONLY, true);
    // Tez/LLAP requires RPC query plan
    HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true);
    try {
        jc = DagUtils.getInstance().createConfiguration(conf);
    } catch (IOException e) {
        throw new HiveException(e);
    }
    Driver driver = new Driver(conf);
    try {
        CommandProcessorResponse cpr = driver.compileAndRespond(query);
        if (cpr.getResponseCode() != 0) {
            throw new HiveException("Failed to compile query: " + cpr.getException());
        }
        QueryPlan plan = driver.getPlan();
        List<Task<?>> roots = plan.getRootTasks();
        Schema schema = convertSchema(plan.getResultSchema());
        if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
            throw new HiveException("Was expecting a single TezTask.");
        }
        TezWork tezWork = ((TezTask) roots.get(0)).getWork();
        if (tezWork.getAllWork().size() != 1) {
            String tableName = "table_" + UUID.randomUUID().toString().replaceAll("[^A-Za-z0-9 ]", "");
            String ctas = "create temporary table " + tableName + " as " + query;
            LOG.info("Materializing the query for LLAPIF; CTAS: " + ctas);
            try {
                driver.resetQueryState();
                HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, originalMode);
                cpr = driver.run(ctas, false);
            } catch (CommandNeedRetryException e) {
                throw new HiveException(e);
            }
            if (cpr.getResponseCode() != 0) {
                throw new HiveException("Failed to create temp table: " + cpr.getException());
            }
            HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
            query = "select * from " + tableName;
            cpr = driver.compileAndRespond(query);
            if (cpr.getResponseCode() != 0) {
                throw new HiveException("Failed to create temp table: " + cpr.getException());
            }
            plan = driver.getPlan();
            roots = plan.getRootTasks();
            schema = convertSchema(plan.getResultSchema());
            if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) {
                throw new HiveException("Was expecting a single TezTask.");
            }
            tezWork = ((TezTask) roots.get(0)).getWork();
        }
        return new PlanFragment(tezWork, schema, jc);
    } finally {
        driver.close();
        driver.destroy();
    }
}
Also used : TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) Task(org.apache.hadoop.hive.ql.exec.Task) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) CommandProcessorResponse(org.apache.hadoop.hive.ql.processors.CommandProcessorResponse) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Schema(org.apache.hadoop.hive.llap.Schema) Driver(org.apache.hadoop.hive.ql.Driver) IOException(java.io.IOException) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) CommandNeedRetryException(org.apache.hadoop.hive.ql.CommandNeedRetryException) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Example 19 with QueryPlan

use of org.apache.hadoop.hive.ql.QueryPlan in project hive by apache.

the class TestHiveDecimalParse method getColumnType.

private String getColumnType(String query) {
    Driver driver = createDriver();
    int rc = driver.compile(query);
    if (rc != 0) {
        return null;
    }
    QueryPlan plan = driver.getPlan();
    DDLTask task = (DDLTask) plan.getRootTasks().get(0);
    DDLWork work = task.getWork();
    CreateTableDesc spec = work.getCreateTblDesc();
    FieldSchema fs = spec.getCols().get(0);
    return fs.getType();
}
Also used : CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) DDLTask(org.apache.hadoop.hive.ql.exec.DDLTask) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Driver(org.apache.hadoop.hive.ql.Driver) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan)

Example 20 with QueryPlan

use of org.apache.hadoop.hive.ql.QueryPlan in project hive by apache.

the class MapJoinCounterHook method run.

public void run(HookContext hookContext) {
    HiveConf conf = hookContext.getConf();
    boolean enableConvert = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVECONVERTJOIN);
    if (!enableConvert) {
        return;
    }
    QueryPlan plan = hookContext.getQueryPlan();
    String queryID = plan.getQueryId();
    // String query = SessionState.get().getCmd();
    int commonJoin = 0;
    int hintedMapJoin = 0;
    int convertedMapJoin = 0;
    int hintedMapJoinLocal = 0;
    int convertedMapJoinLocal = 0;
    int backupCommonJoin = 0;
    List<TaskRunner> list = hookContext.getCompleteTaskList();
    for (TaskRunner tskRunner : list) {
        Task tsk = tskRunner.getTask();
        int tag = tsk.getTaskTag();
        switch(tag) {
            case Task.COMMON_JOIN:
                commonJoin++;
                break;
            case Task.HINTED_MAPJOIN:
                hintedMapJoin++;
                break;
            case Task.HINTED_MAPJOIN_LOCAL:
                hintedMapJoinLocal++;
                break;
            case Task.CONVERTED_MAPJOIN:
                convertedMapJoin++;
                break;
            case Task.CONVERTED_MAPJOIN_LOCAL:
                convertedMapJoinLocal++;
                break;
            case Task.BACKUP_COMMON_JOIN:
                backupCommonJoin++;
                break;
        }
    }
    LogHelper console = SessionState.getConsole();
    console.printError("[MapJoinCounter PostHook] COMMON_JOIN: " + commonJoin + " HINTED_MAPJOIN: " + hintedMapJoin + " HINTED_MAPJOIN_LOCAL: " + hintedMapJoinLocal + " CONVERTED_MAPJOIN: " + convertedMapJoin + " CONVERTED_MAPJOIN_LOCAL: " + convertedMapJoinLocal + " BACKUP_COMMON_JOIN: " + backupCommonJoin);
}
Also used : Task(org.apache.hadoop.hive.ql.exec.Task) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) HiveConf(org.apache.hadoop.hive.conf.HiveConf) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) TaskRunner(org.apache.hadoop.hive.ql.exec.TaskRunner)

Aggregations

QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)34 Test (org.junit.Test)21 HiveConf (org.apache.hadoop.hive.conf.HiveConf)11 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)10 List (java.util.List)7 Driver (org.apache.hadoop.hive.ql.Driver)6 IOException (java.io.IOException)4 LinkedHashMap (java.util.LinkedHashMap)4 Table (org.apache.hadoop.hive.ql.metadata.Table)4 SessionState (org.apache.hadoop.hive.ql.session.SessionState)4 LogHelper (org.apache.hadoop.hive.ql.session.SessionState.LogHelper)4 FileSystem (org.apache.hadoop.fs.FileSystem)3 Path (org.apache.hadoop.fs.Path)3 Context (org.apache.hadoop.hive.ql.Context)3 TezTask (org.apache.hadoop.hive.ql.exec.tez.TezTask)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 LlapIOCounters (org.apache.hadoop.hive.llap.counters.LlapIOCounters)2 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)2