Search in sources :

Example 21 with QueryPlan

use of org.apache.hadoop.hive.ql.QueryPlan in project hive by apache.

the class CheckColumnAccessHook method run.

@Override
public void run(HookContext hookContext) {
    HiveConf conf = hookContext.getConf();
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS) == false) {
        return;
    }
    QueryPlan plan = hookContext.getQueryPlan();
    if (plan == null) {
        return;
    }
    ColumnAccessInfo columnAccessInfo = hookContext.getQueryPlan().getColumnAccessInfo();
    if (columnAccessInfo == null) {
        return;
    }
    LogHelper console = SessionState.getConsole();
    Map<String, List<String>> tableToColumnAccessMap = columnAccessInfo.getTableToColumnAccessMap();
    // Must be deterministic order map for consistent test output across Java versions
    Map<String, String> outputOrderedMap = new LinkedHashMap<String, String>();
    for (Map.Entry<String, List<String>> tableAccess : tableToColumnAccessMap.entrySet()) {
        StringBuilder perTableInfo = new StringBuilder();
        perTableInfo.append("Table:").append(tableAccess.getKey()).append("\n");
        // Sort columns to make output deterministic
        String[] columns = new String[tableAccess.getValue().size()];
        tableAccess.getValue().toArray(columns);
        Arrays.sort(columns);
        perTableInfo.append("Columns:").append(StringUtils.join(columns, ',')).append("\n");
        outputOrderedMap.put(tableAccess.getKey(), perTableInfo.toString());
    }
    for (String perOperatorInfo : outputOrderedMap.values()) {
        console.printError(perOperatorInfo);
    }
}
Also used : LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) ColumnAccessInfo(org.apache.hadoop.hive.ql.parse.ColumnAccessInfo) LinkedHashMap(java.util.LinkedHashMap) HiveConf(org.apache.hadoop.hive.conf.HiveConf) List(java.util.List) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 22 with QueryPlan

use of org.apache.hadoop.hive.ql.QueryPlan in project hive by apache.

the class LineageLogger method run.

@Override
public void run(HookContext hookContext) {
    assert (hookContext.getHookType() == HookType.POST_EXEC_HOOK);
    QueryPlan plan = hookContext.getQueryPlan();
    Index index = hookContext.getIndex();
    SessionState ss = SessionState.get();
    if (ss != null && index != null && OPERATION_NAMES.contains(plan.getOperationName()) && !plan.isExplain()) {
        try {
            StringBuilderWriter out = new StringBuilderWriter(1024);
            JsonWriter writer = new JsonWriter(out);
            String queryStr = plan.getQueryStr().trim();
            writer.beginObject();
            writer.name("version").value(FORMAT_VERSION);
            HiveConf conf = ss.getConf();
            boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST);
            if (!testMode) {
                // Don't emit user/timestamp info in test mode,
                // so that the test golden output file is fixed.
                long queryTime = plan.getQueryStartTime().longValue();
                if (queryTime == 0)
                    queryTime = System.currentTimeMillis();
                long duration = System.currentTimeMillis() - queryTime;
                writer.name("user").value(hookContext.getUgi().getUserName());
                writer.name("timestamp").value(queryTime / 1000);
                writer.name("duration").value(duration);
                writer.name("jobIds");
                writer.beginArray();
                List<TaskRunner> tasks = hookContext.getCompleteTaskList();
                if (tasks != null && !tasks.isEmpty()) {
                    for (TaskRunner task : tasks) {
                        String jobId = task.getTask().getJobID();
                        if (jobId != null) {
                            writer.value(jobId);
                        }
                    }
                }
                writer.endArray();
            }
            writer.name("engine").value(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE));
            writer.name("database").value(ss.getCurrentDatabase());
            writer.name("hash").value(getQueryHash(queryStr));
            writer.name("queryText").value(queryStr);
            List<Edge> edges = getEdges(plan, index);
            Set<Vertex> vertices = getVertices(edges);
            writeEdges(writer, edges);
            writeVertices(writer, vertices);
            writer.endObject();
            writer.close();
            // Logger the lineage info
            String lineage = out.toString();
            if (testMode) {
                // Logger to console
                log(lineage);
            } else {
                // In non-test mode, emit to a log file,
                // which can be different from the normal hive.log.
                // For example, using NoDeleteRollingFileAppender to
                // log to some file with different rolling policy.
                LOG.info(lineage);
            }
        } catch (Throwable t) {
            // Don't fail the query just because of any lineage issue.
            log("Failed to log lineage graph, query is not affected\n" + org.apache.hadoop.util.StringUtils.stringifyException(t));
        }
    }
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) StringBuilderWriter(org.apache.commons.io.output.StringBuilderWriter) Index(org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) JsonWriter(com.google.gson.stream.JsonWriter) TaskRunner(org.apache.hadoop.hive.ql.exec.TaskRunner) HiveConf(org.apache.hadoop.hive.conf.HiveConf)

Example 23 with QueryPlan

use of org.apache.hadoop.hive.ql.QueryPlan in project hive by apache.

the class PostExecOrcRowGroupCountPrinter method run.

@Override
public void run(HookContext hookContext) throws Exception {
    assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK);
    HiveConf conf = hookContext.getConf();
    if (!"tez".equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE))) {
        return;
    }
    LOG.info("Executing post execution hook to print ORC row groups read counter..");
    SessionState ss = SessionState.get();
    SessionState.LogHelper console = ss.getConsole();
    QueryPlan plan = hookContext.getQueryPlan();
    if (plan == null) {
        return;
    }
    List<TezTask> rootTasks = Utilities.getTezTasks(plan.getRootTasks());
    for (TezTask tezTask : rootTasks) {
        LOG.info("Printing ORC row group counter for tez task: " + tezTask.getName());
        TezCounters counters = tezTask.getTezCounters();
        if (counters != null) {
            for (CounterGroup group : counters) {
                if (group.getName().equals(LlapIOCounters.class.getName())) {
                    console.printError(tezTask.getId() + " LLAP IO COUNTERS:");
                    for (TezCounter counter : group) {
                        if (counter.getDisplayName().equals(LlapIOCounters.SELECTED_ROWGROUPS.name())) {
                            console.printError("   " + counter.getDisplayName() + ": " + counter.getValue());
                        }
                    }
                }
            }
        }
    }
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) LlapIOCounters(org.apache.hadoop.hive.llap.counters.LlapIOCounters) CounterGroup(org.apache.tez.common.counters.CounterGroup) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TezCounter(org.apache.tez.common.counters.TezCounter) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) TezCounters(org.apache.tez.common.counters.TezCounters)

Example 24 with QueryPlan

use of org.apache.hadoop.hive.ql.QueryPlan in project hive by apache.

the class TestDbTxnManager method testSingleReadPartition.

@Test
public void testSingleReadPartition() throws Exception {
    addPartitionInput(newTable(true));
    QueryPlan qp = new MockQueryPlan(this);
    txnMgr.acquireLocks(qp, ctx, null);
    List<HiveLock> locks = ctx.getHiveLocks();
    Assert.assertEquals(1, locks.size());
    Assert.assertEquals(1, TxnDbUtil.countLockComponents(((DbLockManager.DbHiveLock) locks.get(0)).lockId));
    txnMgr.getLockManager().unlock(locks.get(0));
    locks = txnMgr.getLockManager().getLocks(false, false);
    Assert.assertEquals(0, locks.size());
}
Also used : QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) Test(org.junit.Test)

Example 25 with QueryPlan

use of org.apache.hadoop.hive.ql.QueryPlan in project hive by apache.

the class TestColumnAccess method testJoinTable1AndTable2.

@Test
public void testJoinTable1AndTable2() throws ParseException {
    String query = "select * from t1 join t2 on (t1.id1 = t2.id1)";
    Driver driver = createDriver();
    int rc = driver.compile(query);
    Assert.assertEquals("Checking command success", 0, rc);
    QueryPlan plan = driver.getPlan();
    // check access columns from ColumnAccessInfo
    ColumnAccessInfo columnAccessInfo = plan.getColumnAccessInfo();
    List<String> cols = columnAccessInfo.getTableToColumnAccessMap().get("default@t1");
    Assert.assertNotNull(cols);
    Assert.assertEquals(2, cols.size());
    Assert.assertNotNull(cols.contains("id1"));
    Assert.assertNotNull(cols.contains("name1"));
    cols = columnAccessInfo.getTableToColumnAccessMap().get("default@t2");
    Assert.assertNotNull(cols);
    Assert.assertEquals(3, cols.size());
    Assert.assertNotNull(cols.contains("id2"));
    Assert.assertNotNull(cols.contains("id1"));
    Assert.assertNotNull(cols.contains("name1"));
    // check access columns from readEntity
    Map<String, List<String>> tableColsMap = getColsFromReadEntity(plan.getInputs());
    cols = tableColsMap.get("default@t1");
    Assert.assertNotNull(cols);
    Assert.assertEquals(2, cols.size());
    Assert.assertNotNull(cols.contains("id1"));
    Assert.assertNotNull(cols.contains("name1"));
    cols = tableColsMap.get("default@t2");
    Assert.assertNotNull(cols);
    Assert.assertEquals(3, cols.size());
    Assert.assertNotNull(cols.contains("id2"));
    Assert.assertNotNull(cols.contains("id1"));
    Assert.assertNotNull(cols.contains("name1"));
}
Also used : Driver(org.apache.hadoop.hive.ql.Driver) List(java.util.List) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) Test(org.junit.Test)

Aggregations

QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)34 Test (org.junit.Test)21 HiveConf (org.apache.hadoop.hive.conf.HiveConf)11 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)10 List (java.util.List)7 Driver (org.apache.hadoop.hive.ql.Driver)6 IOException (java.io.IOException)4 LinkedHashMap (java.util.LinkedHashMap)4 Table (org.apache.hadoop.hive.ql.metadata.Table)4 SessionState (org.apache.hadoop.hive.ql.session.SessionState)4 LogHelper (org.apache.hadoop.hive.ql.session.SessionState.LogHelper)4 FileSystem (org.apache.hadoop.fs.FileSystem)3 Path (org.apache.hadoop.fs.Path)3 Context (org.apache.hadoop.hive.ql.Context)3 TezTask (org.apache.hadoop.hive.ql.exec.tez.TezTask)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 LlapIOCounters (org.apache.hadoop.hive.llap.counters.LlapIOCounters)2 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)2