Search in sources :

Example 1 with TaskQueue

use of org.apache.hadoop.hive.ql.TaskQueue in project hive by apache.

the class TruncateTableOperation method execute.

@Override
public int execute() throws HiveException {
    if (desc.getColumnIndexes() != null) {
        ColumnTruncateWork truncateWork = new ColumnTruncateWork(desc.getColumnIndexes(), desc.getInputDir(), desc.getOutputDir());
        truncateWork.setListBucketingCtx(desc.getLbCtx());
        truncateWork.setMapperCannotSpanPartns(true);
        TaskQueue taskQueue = new TaskQueue();
        ColumnTruncateTask taskExec = new ColumnTruncateTask();
        taskExec.initialize(context.getQueryState(), null, taskQueue, null);
        taskExec.setWork(truncateWork);
        taskExec.setQueryPlan(context.getQueryPlan());
        int ret = taskExec.execute();
        if (taskExec.getException() != null) {
            context.getTask().setException(taskExec.getException());
        }
        return ret;
    }
    String tableName = desc.getTableName();
    Map<String, String> partSpec = desc.getPartSpec();
    ReplicationSpec replicationSpec = desc.getReplicationSpec();
    if (!DDLUtils.allowOperationInReplicationScope(context.getDb(), tableName, partSpec, replicationSpec)) {
        // no truncate, the table is missing either due to drop/rename which follows the truncate.
        // or the existing table is newer than our update.
        LOG.debug("DDLTask: Truncate Table/Partition is skipped as table {} / partition {} is newer than update", tableName, (partSpec == null) ? "null" : FileUtils.makePartName(new ArrayList<>(partSpec.keySet()), new ArrayList<>(partSpec.values())));
        return 0;
    }
    try {
        context.getDb().truncateTable(tableName, partSpec, replicationSpec != null && replicationSpec.isInReplicationScope() ? desc.getWriteId() : 0L);
    } catch (Exception e) {
        throw new HiveException(e, ErrorMsg.GENERIC_ERROR);
    }
    return 0;
}
Also used : ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ColumnTruncateTask(org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateTask) TaskQueue(org.apache.hadoop.hive.ql.TaskQueue) ColumnTruncateWork(org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateWork) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 2 with TaskQueue

use of org.apache.hadoop.hive.ql.TaskQueue in project hive by apache.

the class TestReplicationScenarios method getReplLoadRootTask.

private Task getReplLoadRootTask(String sourceDb, String replicadb, boolean isIncrementalDump, Tuple tuple) throws Throwable {
    HiveConf confTemp = driverMirror.getConf();
    Path loadPath = new Path(tuple.dumpLocation, ReplUtils.REPL_HIVE_BASE_DIR);
    ReplicationMetricCollector metricCollector;
    if (isIncrementalDump) {
        metricCollector = new IncrementalLoadMetricCollector(replicadb, tuple.dumpLocation, 0, confTemp);
    } else {
        metricCollector = new BootstrapLoadMetricCollector(replicadb, tuple.dumpLocation, 0, confTemp);
    }
    /* When 'hive.repl.retain.custom.db.locations.on.target' is enabled, the first iteration of repl load would
       run only database creation task, and only in next iteration of Repl Load Task execution, remaining tasks will be
       executed. Hence disabling this to perform the test on task optimization.  */
    confTemp.setBoolVar(HiveConf.ConfVars.REPL_RETAIN_CUSTOM_LOCATIONS_FOR_DB_ON_TARGET, false);
    ReplLoadWork replLoadWork = new ReplLoadWork(confTemp, loadPath.toString(), sourceDb, replicadb, null, null, isIncrementalDump, Long.valueOf(tuple.lastReplId), 0L, metricCollector, false);
    Task replLoadTask = TaskFactory.get(replLoadWork, confTemp);
    replLoadTask.initialize(null, null, new TaskQueue(driver.getContext()), driver.getContext());
    replLoadTask.executeTask(null);
    Hive.closeCurrent();
    return replLoadWork.getRootTask();
}
Also used : Path(org.apache.hadoop.fs.Path) DDLTask(org.apache.hadoop.hive.ql.ddl.DDLTask) Task(org.apache.hadoop.hive.ql.exec.Task) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) ReplLoadWork(org.apache.hadoop.hive.ql.exec.repl.ReplLoadWork) TaskQueue(org.apache.hadoop.hive.ql.TaskQueue) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ReplicationMetricCollector(org.apache.hadoop.hive.ql.parse.repl.metric.ReplicationMetricCollector) IncrementalLoadMetricCollector(org.apache.hadoop.hive.ql.parse.repl.load.metric.IncrementalLoadMetricCollector) BootstrapLoadMetricCollector(org.apache.hadoop.hive.ql.parse.repl.load.metric.BootstrapLoadMetricCollector)

Example 3 with TaskQueue

use of org.apache.hadoop.hive.ql.TaskQueue in project hive by apache.

the class TestSparkStatistics method testSparkStatistics.

@Test
public void testSparkStatistics() throws MalformedURLException, CommandProcessorException {
    String confDir = "../../data/conf/spark/standalone/hive-site.xml";
    HiveConf.setHiveSiteLocation(new File(confDir).toURI().toURL());
    HiveConf conf = new HiveConf();
    conf.set("spark.local.dir", Paths.get(System.getProperty("test.tmp.dir"), "TestSparkStatistics-local-dir").toString());
    SessionState.start(conf);
    Driver driver = null;
    try {
        driver = new Driver(new QueryState.Builder().withGenerateNewQueryId(true).withHiveConf(conf).build(), null);
        driver.run("create table test (col int)");
        Assert.assertEquals(0, driver.compile("select * from test order by col", true));
        List<SparkTask> sparkTasks = Utilities.getSparkTasks(driver.getPlan().getRootTasks());
        Assert.assertEquals(1, sparkTasks.size());
        SparkTask sparkTask = sparkTasks.get(0);
        TaskQueue taskQueue = new TaskQueue(driver.getContext());
        taskQueue.prepare(driver.getPlan());
        sparkTask.initialize(driver.getQueryState(), driver.getPlan(), taskQueue, driver.getContext());
        Assert.assertEquals(0, sparkTask.execute());
        Assert.assertNotNull(sparkTask.getSparkStatistics());
        List<SparkStatistic> sparkStats = Lists.newArrayList(sparkTask.getSparkStatistics().getStatisticGroup(SparkStatisticsNames.SPARK_GROUP_NAME).getStatistics());
        Assert.assertEquals(26, sparkStats.size());
        Map<String, String> statsMap = sparkStats.stream().collect(Collectors.toMap(SparkStatistic::getName, SparkStatistic::getValue));
        Assert.assertTrue(Long.parseLong(statsMap.get(SparkStatisticsNames.TASK_DURATION_TIME)) > 0);
        Assert.assertTrue(Long.parseLong(statsMap.get(SparkStatisticsNames.EXECUTOR_CPU_TIME)) > 0);
        Assert.assertTrue(Long.parseLong(statsMap.get(SparkStatisticsNames.EXECUTOR_DESERIALIZE_CPU_TIME)) > 0);
        Assert.assertTrue(Long.parseLong(statsMap.get(SparkStatisticsNames.EXECUTOR_DESERIALIZE_TIME)) > 0);
        Assert.assertTrue(Long.parseLong(statsMap.get(SparkStatisticsNames.EXECUTOR_RUN_TIME)) > 0);
    } finally {
        if (driver != null) {
            driver.run("drop table if exists test");
            driver.destroy();
        }
    }
}
Also used : Driver(org.apache.hadoop.hive.ql.Driver) TaskQueue(org.apache.hadoop.hive.ql.TaskQueue) HiveConf(org.apache.hadoop.hive.conf.HiveConf) SparkStatistic(org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatistic) QueryState(org.apache.hadoop.hive.ql.QueryState) File(java.io.File) Test(org.junit.Test)

Example 4 with TaskQueue

use of org.apache.hadoop.hive.ql.TaskQueue in project hive by apache.

the class AlterTableConcatenateOperation method executeTask.

private int executeTask(Context generalContext, Task<?> task) {
    TaskQueue taskQueue = new TaskQueue();
    task.initialize(context.getQueryState(), context.getQueryPlan(), taskQueue, generalContext);
    int ret = task.execute();
    if (task.getException() != null) {
        context.getTask().setException(task.getException());
    }
    return ret;
}
Also used : TaskQueue(org.apache.hadoop.hive.ql.TaskQueue)

Example 5 with TaskQueue

use of org.apache.hadoop.hive.ql.TaskQueue in project hive by apache.

the class AcidExportSemanticAnalyzer method analyzeAcidExport.

/**
 * See {@link #isAcidExport(ASTNode)}
 * 1. create the temp table T
 * 2. compile 'insert into T select * from acidTable'
 * 3. compile 'export acidTable'  (acidTable will be replaced with T during execution)
 * 4. create task to drop T
 *
 * Using a true temp (session level) table means it should not affect replication and the table
 * is not visible outside the Session that created for security
 */
private void analyzeAcidExport(ASTNode ast) throws SemanticException {
    assert ast != null && ast.getToken() != null && ast.getToken().getType() == HiveParser.TOK_EXPORT;
    ASTNode tableTree = (ASTNode) ast.getChild(0);
    assert tableTree != null && tableTree.getType() == HiveParser.TOK_TAB;
    ASTNode tokRefOrNameExportTable = (ASTNode) tableTree.getChild(0);
    Table exportTable = getTargetTable(tokRefOrNameExportTable);
    if (exportTable != null && (exportTable.isView() || exportTable.isMaterializedView())) {
        throw new SemanticException("Views and Materialized Views can not be exported.");
    }
    assert AcidUtils.isFullAcidTable(exportTable);
    // need to create the table "manually" rather than creating a task since it has to exist to
    // compile the insert into T...
    // this is db.table
    final String newTableName = getTmptTableNameForExport(exportTable);
    final TableName newTableNameRef = HiveTableName.of(newTableName);
    Map<String, String> tblProps = new HashMap<>();
    tblProps.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, Boolean.FALSE.toString());
    String location;
    // it has the same life cycle as the tmp table
    try {
        // Generate a unique ID for temp table path.
        // This path will be fixed for the life of the temp table.
        Path path = new Path(SessionState.getTempTableSpace(conf), UUID.randomUUID().toString());
        path = Warehouse.getDnsPath(path, conf);
        location = path.toString();
    } catch (MetaException err) {
        throw new SemanticException("Error while generating temp table path:", err);
    }
    CreateTableLikeDesc ctlt = new CreateTableLikeDesc(newTableName, false, true, null, null, location, null, null, tblProps, // important so we get an exception on name collision
    true, Warehouse.getQualifiedName(exportTable.getTTable()), false);
    Table newTable;
    try {
        ReadEntity dbForTmpTable = new ReadEntity(db.getDatabase(exportTable.getDbName()));
        // so the plan knows we are 'reading' this db - locks, security...
        inputs.add(dbForTmpTable);
        DDLTask createTableTask = (DDLTask) TaskFactory.get(new DDLWork(new HashSet<>(), new HashSet<>(), ctlt), conf);
        // above get() doesn't set it
        createTableTask.setConf(conf);
        Context context = new Context(conf);
        createTableTask.initialize(null, null, new TaskQueue(context), context);
        createTableTask.execute();
        newTable = db.getTable(newTableName);
    } catch (HiveException ex) {
        throw new SemanticException(ex);
    }
    // now generate insert statement
    // insert into newTableName select * from ts <where partition spec>
    StringBuilder rewrittenQueryStr = generateExportQuery(newTable.getPartCols(), tokRefOrNameExportTable, tableTree, newTableName);
    ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
    Context rewrittenCtx = rr.rewrittenCtx;
    // it's set in parseRewrittenQuery()
    rewrittenCtx.setIsUpdateDeleteMerge(false);
    ASTNode rewrittenTree = rr.rewrittenTree;
    try {
        useSuper = true;
        // newTable has to exist at this point to compile
        super.analyze(rewrittenTree, rewrittenCtx);
    } finally {
        useSuper = false;
    }
    // now we have the rootTasks set up for Insert ... Select
    removeStatsTasks(rootTasks);
    // now make an ExportTask from temp table
    /*analyzeExport() creates TableSpec which in turn tries to build
     "public List<Partition> partitions" by looking in the metastore to find Partitions matching
     the partition spec in the Export command.  These of course don't exist yet since we've not
     ran the insert stmt yet!!!!!!!
      */
    Task<ExportWork> exportTask = ExportSemanticAnalyzer.analyzeExport(ast, newTableName, db, conf, inputs, outputs);
    // Add an alter table task to set transactional props
    // do it after populating temp table so that it's written as non-transactional table but
    // update props before export so that export archive metadata has these props.  This way when
    // IMPORT is done for this archive and target table doesn't exist, it will be created as Acid.
    Map<String, String> mapProps = new HashMap<>();
    mapProps.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, Boolean.TRUE.toString());
    AlterTableSetPropertiesDesc alterTblDesc = new AlterTableSetPropertiesDesc(newTableNameRef, null, null, false, mapProps, false, false, null);
    addExportTask(rootTasks, exportTask, TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc)));
    // Now make a task to drop temp table
    // {@link DropTableAnalyzer#analyzeInternal(ASTNode ast)
    ReplicationSpec replicationSpec = new ReplicationSpec();
    DropTableDesc dropTblDesc = new DropTableDesc(newTableName, false, true, replicationSpec);
    Task<DDLWork> dropTask = TaskFactory.get(new DDLWork(new HashSet<>(), new HashSet<>(), dropTblDesc), conf);
    exportTask.addDependentTask(dropTask);
    markReadEntityForUpdate();
    if (ctx.isExplainPlan()) {
        try {
            // so that "explain" doesn't "leak" tmp tables
            // TODO: catalog
            db.dropTable(newTable.getDbName(), newTable.getTableName(), true, true, true);
        } catch (HiveException ex) {
            LOG.warn("Unable to drop " + newTableName + " due to: " + ex.getMessage(), ex);
        }
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMap(java.util.HashMap) ExportWork(org.apache.hadoop.hive.ql.plan.ExportWork) CreateTableLikeDesc(org.apache.hadoop.hive.ql.ddl.table.create.like.CreateTableLikeDesc) TaskQueue(org.apache.hadoop.hive.ql.TaskQueue) AlterTableSetPropertiesDesc(org.apache.hadoop.hive.ql.ddl.table.misc.properties.AlterTableSetPropertiesDesc) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) Context(org.apache.hadoop.hive.ql.Context) Table(org.apache.hadoop.hive.ql.metadata.Table) DropTableDesc(org.apache.hadoop.hive.ql.ddl.table.drop.DropTableDesc) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) TableName(org.apache.hadoop.hive.common.TableName) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) DDLTask(org.apache.hadoop.hive.ql.ddl.DDLTask)

Aggregations

TaskQueue (org.apache.hadoop.hive.ql.TaskQueue)10 Path (org.apache.hadoop.fs.Path)4 HiveConf (org.apache.hadoop.hive.conf.HiveConf)4 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 QueryState (org.apache.hadoop.hive.ql.QueryState)3 Test (org.junit.Test)3 Context (org.apache.hadoop.hive.ql.Context)2 DDLTask (org.apache.hadoop.hive.ql.ddl.DDLTask)2 Task (org.apache.hadoop.hive.ql.exec.Task)2 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)2 File (java.io.File)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 LogInitializationException (org.apache.hadoop.hive.common.LogUtils.LogInitializationException)1 TableName (org.apache.hadoop.hive.common.TableName)1 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)1 Driver (org.apache.hadoop.hive.ql.Driver)1