Search in sources :

Example 1 with DDLTask

use of org.apache.hadoop.hive.ql.ddl.DDLTask in project hive by apache.

the class CreateTableHook method postAnalyze.

@Override
public void postAnalyze(HiveSemanticAnalyzerHookContext context, List<Task<?>> rootTasks) throws SemanticException {
    if (rootTasks.size() == 0) {
        // There will be no DDL task created in case if its CREATE TABLE IF NOT EXISTS
        return;
    }
    Task<?> t = rootTasks.get(rootTasks.size() - 1);
    if (!(t instanceof DDLTask)) {
        return;
    }
    DDLTask task = (DDLTask) t;
    DDLDesc d = task.getWork().getDDLDesc();
    if (!(d instanceof CreateTableDesc)) {
        return;
    }
    CreateTableDesc desc = (CreateTableDesc) d;
    Map<String, String> tblProps = desc.getTblProps();
    if (tblProps == null) {
        // tblProps will be null if user didnt use tblprops in his CREATE
        // TABLE cmd.
        tblProps = new HashMap<String, String>();
    }
    // first check if we will allow the user to create table.
    String storageHandler = desc.getStorageHandler();
    if (StringUtils.isNotEmpty(storageHandler)) {
        try {
            HiveStorageHandler storageHandlerInst = HCatUtil.getStorageHandler(context.getConf(), desc.getStorageHandler(), desc.getSerName(), desc.getInputFormat(), desc.getOutputFormat());
        // Authorization checks are performed by the storageHandler.getAuthorizationProvider(), if
        // StorageDelegationAuthorizationProvider is used.
        } catch (IOException e) {
            throw new SemanticException(e);
        }
    }
    try {
        Table table = context.getHive().newTable(desc.getDbTableName());
        if (desc.getLocation() != null) {
            table.setDataLocation(new Path(desc.getLocation()));
        }
        if (desc.getStorageHandler() != null) {
            table.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, desc.getStorageHandler());
        }
        for (Map.Entry<String, String> prop : tblProps.entrySet()) {
            table.setProperty(prop.getKey(), prop.getValue());
        }
        for (Map.Entry<String, String> prop : desc.getSerdeProps().entrySet()) {
            table.setSerdeParam(prop.getKey(), prop.getValue());
        }
        if (HCatAuthUtil.isAuthorizationEnabled(context.getConf())) {
            authorize(table, Privilege.CREATE);
        }
    } catch (HiveException ex) {
        throw new SemanticException(ex);
    }
    desc.setTblProps(tblProps);
    context.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, tableName);
}
Also used : Path(org.apache.hadoop.fs.Path) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc) DDLTask(org.apache.hadoop.hive.ql.ddl.DDLTask) DDLDesc(org.apache.hadoop.hive.ql.ddl.DDLDesc) HashMap(java.util.HashMap) Map(java.util.Map) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 2 with DDLTask

use of org.apache.hadoop.hive.ql.ddl.DDLTask in project hive by apache.

the class DummyCreateTableHook method postAnalyze.

@Override
public void postAnalyze(HiveSemanticAnalyzerHookContext context, List<Task<?>> rootTasks) throws SemanticException {
    CreateTableDesc desc = (CreateTableDesc) ((DDLTask) rootTasks.get(rootTasks.size() - 1)).getWork().getDDLDesc();
    Map<String, String> tblProps = desc.getTblProps();
    if (tblProps == null) {
        tblProps = new HashMap<String, String>();
    }
    tblProps.put("createdBy", DummyCreateTableHook.class.getName());
    tblProps.put("Message", "Open Source rocks!!");
    desc.setTblProps(tblProps);
}
Also used : CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc) DDLTask(org.apache.hadoop.hive.ql.ddl.DDLTask)

Example 3 with DDLTask

use of org.apache.hadoop.hive.ql.ddl.DDLTask in project hive by apache.

the class AcidExportSemanticAnalyzer method analyzeAcidExport.

/**
 * See {@link #isAcidExport(ASTNode)}
 * 1. create the temp table T
 * 2. compile 'insert into T select * from acidTable'
 * 3. compile 'export acidTable'  (acidTable will be replaced with T during execution)
 * 4. create task to drop T
 *
 * Using a true temp (session level) table means it should not affect replication and the table
 * is not visible outside the Session that created for security
 */
private void analyzeAcidExport(ASTNode ast) throws SemanticException {
    assert ast != null && ast.getToken() != null && ast.getToken().getType() == HiveParser.TOK_EXPORT;
    ASTNode tableTree = (ASTNode) ast.getChild(0);
    assert tableTree != null && tableTree.getType() == HiveParser.TOK_TAB;
    ASTNode tokRefOrNameExportTable = (ASTNode) tableTree.getChild(0);
    Table exportTable = getTargetTable(tokRefOrNameExportTable);
    if (exportTable != null && (exportTable.isView() || exportTable.isMaterializedView())) {
        throw new SemanticException("Views and Materialized Views can not be exported.");
    }
    assert AcidUtils.isFullAcidTable(exportTable);
    // need to create the table "manually" rather than creating a task since it has to exist to
    // compile the insert into T...
    // this is db.table
    final String newTableName = getTmptTableNameForExport(exportTable);
    final TableName newTableNameRef = HiveTableName.of(newTableName);
    Map<String, String> tblProps = new HashMap<>();
    tblProps.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, Boolean.FALSE.toString());
    String location;
    // it has the same life cycle as the tmp table
    try {
        // Generate a unique ID for temp table path.
        // This path will be fixed for the life of the temp table.
        Path path = new Path(SessionState.getTempTableSpace(conf), UUID.randomUUID().toString());
        path = Warehouse.getDnsPath(path, conf);
        location = path.toString();
    } catch (MetaException err) {
        throw new SemanticException("Error while generating temp table path:", err);
    }
    CreateTableLikeDesc ctlt = new CreateTableLikeDesc(newTableName, false, true, null, null, location, null, null, tblProps, // important so we get an exception on name collision
    true, Warehouse.getQualifiedName(exportTable.getTTable()), false);
    Table newTable;
    try {
        ReadEntity dbForTmpTable = new ReadEntity(db.getDatabase(exportTable.getDbName()));
        // so the plan knows we are 'reading' this db - locks, security...
        inputs.add(dbForTmpTable);
        DDLTask createTableTask = (DDLTask) TaskFactory.get(new DDLWork(new HashSet<>(), new HashSet<>(), ctlt), conf);
        // above get() doesn't set it
        createTableTask.setConf(conf);
        Context context = new Context(conf);
        createTableTask.initialize(null, null, new TaskQueue(context), context);
        createTableTask.execute();
        newTable = db.getTable(newTableName);
    } catch (HiveException ex) {
        throw new SemanticException(ex);
    }
    // now generate insert statement
    // insert into newTableName select * from ts <where partition spec>
    StringBuilder rewrittenQueryStr = generateExportQuery(newTable.getPartCols(), tokRefOrNameExportTable, tableTree, newTableName);
    ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
    Context rewrittenCtx = rr.rewrittenCtx;
    // it's set in parseRewrittenQuery()
    rewrittenCtx.setIsUpdateDeleteMerge(false);
    ASTNode rewrittenTree = rr.rewrittenTree;
    try {
        useSuper = true;
        // newTable has to exist at this point to compile
        super.analyze(rewrittenTree, rewrittenCtx);
    } finally {
        useSuper = false;
    }
    // now we have the rootTasks set up for Insert ... Select
    removeStatsTasks(rootTasks);
    // now make an ExportTask from temp table
    /*analyzeExport() creates TableSpec which in turn tries to build
     "public List<Partition> partitions" by looking in the metastore to find Partitions matching
     the partition spec in the Export command.  These of course don't exist yet since we've not
     ran the insert stmt yet!!!!!!!
      */
    Task<ExportWork> exportTask = ExportSemanticAnalyzer.analyzeExport(ast, newTableName, db, conf, inputs, outputs);
    // Add an alter table task to set transactional props
    // do it after populating temp table so that it's written as non-transactional table but
    // update props before export so that export archive metadata has these props.  This way when
    // IMPORT is done for this archive and target table doesn't exist, it will be created as Acid.
    Map<String, String> mapProps = new HashMap<>();
    mapProps.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, Boolean.TRUE.toString());
    AlterTableSetPropertiesDesc alterTblDesc = new AlterTableSetPropertiesDesc(newTableNameRef, null, null, false, mapProps, false, false, null);
    addExportTask(rootTasks, exportTask, TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc)));
    // Now make a task to drop temp table
    // {@link DropTableAnalyzer#analyzeInternal(ASTNode ast)
    ReplicationSpec replicationSpec = new ReplicationSpec();
    DropTableDesc dropTblDesc = new DropTableDesc(newTableName, false, true, replicationSpec);
    Task<DDLWork> dropTask = TaskFactory.get(new DDLWork(new HashSet<>(), new HashSet<>(), dropTblDesc), conf);
    exportTask.addDependentTask(dropTask);
    markReadEntityForUpdate();
    if (ctx.isExplainPlan()) {
        try {
            // so that "explain" doesn't "leak" tmp tables
            // TODO: catalog
            db.dropTable(newTable.getDbName(), newTable.getTableName(), true, true, true);
        } catch (HiveException ex) {
            LOG.warn("Unable to drop " + newTableName + " due to: " + ex.getMessage(), ex);
        }
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMap(java.util.HashMap) ExportWork(org.apache.hadoop.hive.ql.plan.ExportWork) CreateTableLikeDesc(org.apache.hadoop.hive.ql.ddl.table.create.like.CreateTableLikeDesc) TaskQueue(org.apache.hadoop.hive.ql.TaskQueue) AlterTableSetPropertiesDesc(org.apache.hadoop.hive.ql.ddl.table.misc.properties.AlterTableSetPropertiesDesc) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) Context(org.apache.hadoop.hive.ql.Context) Table(org.apache.hadoop.hive.ql.metadata.Table) DropTableDesc(org.apache.hadoop.hive.ql.ddl.table.drop.DropTableDesc) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) TableName(org.apache.hadoop.hive.common.TableName) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) DDLTask(org.apache.hadoop.hive.ql.ddl.DDLTask)

Example 4 with DDLTask

use of org.apache.hadoop.hive.ql.ddl.DDLTask in project hive by apache.

the class TestConditionalResolverCommonJoin method testResolvingDriverAlias.

@Test
public void testResolvingDriverAlias() throws Exception {
    ConditionalResolverCommonJoin resolver = new ConditionalResolverCommonJoin();
    Map<Path, List<String>> pathToAliases = new HashMap<>();
    pathToAliases.put(new Path("path1"), new ArrayList<String>(Arrays.asList("alias1", "alias2")));
    pathToAliases.put(new Path("path2"), new ArrayList<String>(Arrays.asList("alias3")));
    HashMap<String, Long> aliasToKnownSize = new HashMap<String, Long>();
    aliasToKnownSize.put("alias1", 1024l);
    aliasToKnownSize.put("alias2", 2048l);
    aliasToKnownSize.put("alias3", 4096l);
    DDLTask task1 = new DDLTask();
    task1.setId("alias2");
    DDLTask task2 = new DDLTask();
    task2.setId("alias3");
    // joins alias1, alias2, alias3 (alias1 was not eligible for big pos)
    // Must be deterministic order map for consistent q-test output across Java versions
    HashMap<Task<?>, Set<String>> taskToAliases = new LinkedHashMap<Task<?>, Set<String>>();
    taskToAliases.put(task1, new HashSet<String>(Arrays.asList("alias2")));
    taskToAliases.put(task2, new HashSet<String>(Arrays.asList("alias3")));
    ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx ctx = new ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx();
    ctx.setPathToAliases(pathToAliases);
    ctx.setTaskToAliases(taskToAliases);
    ctx.setAliasToKnownSize(aliasToKnownSize);
    HiveConf conf = new HiveConf();
    conf.setLongVar(HiveConf.ConfVars.HIVESMALLTABLESFILESIZE, 4096);
    // alias3 only can be selected
    Task resolved = resolver.resolveMapJoinTask(ctx, conf);
    Assert.assertEquals("alias3", resolved.getId());
    conf.setLongVar(HiveConf.ConfVars.HIVESMALLTABLESFILESIZE, 65536);
    // alias1, alias2, alias3 all can be selected but overriden by biggest one (alias3)
    resolved = resolver.resolveMapJoinTask(ctx, conf);
    Assert.assertEquals("alias3", resolved.getId());
    conf.setLongVar(HiveConf.ConfVars.HIVESMALLTABLESFILESIZE, 2048);
    // not selected
    resolved = resolver.resolveMapJoinTask(ctx, conf);
    Assert.assertNull(resolved);
}
Also used : Path(org.apache.hadoop.fs.Path) DDLTask(org.apache.hadoop.hive.ql.ddl.DDLTask) Task(org.apache.hadoop.hive.ql.exec.Task) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) LinkedHashMap(java.util.LinkedHashMap) DDLTask(org.apache.hadoop.hive.ql.ddl.DDLTask) ArrayList(java.util.ArrayList) List(java.util.List) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Test(org.junit.Test)

Example 5 with DDLTask

use of org.apache.hadoop.hive.ql.ddl.DDLTask in project hive by apache.

the class TaskCompiler method patchUpAfterCTASorMaterializedView.

private void patchUpAfterCTASorMaterializedView(List<Task<?>> rootTasks, Set<ReadEntity> inputs, Set<WriteEntity> outputs, Task<?> createTask, boolean createTaskAfterMoveTask) {
    // clear the mapredWork output file from outputs for CTAS
    // DDLWork at the tail of the chain will have the output
    Iterator<WriteEntity> outIter = outputs.iterator();
    while (outIter.hasNext()) {
        switch(outIter.next().getType()) {
            case DFS_DIR:
            case LOCAL_DIR:
                outIter.remove();
                break;
            default:
                break;
        }
    }
    // find all leaf tasks and make the DDLTask as a dependent task on all of them
    Set<Task<?>> leaves = new LinkedHashSet<>();
    getLeafTasks(rootTasks, leaves);
    assert (leaves.size() > 0);
    // Target task is supposed to be the last task
    Task<?> targetTask = createTask;
    for (Task<?> task : leaves) {
        if (task instanceof StatsTask) {
            // StatsTask require table to already exist
            for (Task<?> parentOfStatsTask : task.getParentTasks()) {
                if (parentOfStatsTask instanceof MoveTask && !createTaskAfterMoveTask) {
                    // For partitioned CTAS, we need to create the table before the move task
                    // as we need to create the partitions in metastore and for that we should
                    // have already registered the table
                    interleaveTask(parentOfStatsTask, createTask);
                } else {
                    parentOfStatsTask.addDependentTask(createTask);
                }
            }
            for (Task<?> parentOfCrtTblTask : createTask.getParentTasks()) {
                parentOfCrtTblTask.removeDependentTask(task);
            }
            createTask.addDependentTask(task);
            targetTask = task;
        } else if (task instanceof MoveTask && !createTaskAfterMoveTask) {
            // For partitioned CTAS, we need to create the table before the move task
            // as we need to create the partitions in metastore and for that we should
            // have already registered the table
            interleaveTask(task, createTask);
            targetTask = task;
        } else {
            task.addDependentTask(createTask);
        }
    }
    // Add task to insert / delete materialized view from registry if needed
    if (createTask instanceof DDLTask) {
        DDLTask ddlTask = (DDLTask) createTask;
        DDLWork work = ddlTask.getWork();
        DDLDesc desc = work.getDDLDesc();
        if (desc instanceof CreateMaterializedViewDesc) {
            CreateMaterializedViewDesc createViewDesc = (CreateMaterializedViewDesc) desc;
            String tableName = createViewDesc.getViewName();
            boolean retrieveAndInclude = createViewDesc.isRewriteEnabled();
            MaterializedViewUpdateDesc materializedViewUpdateDesc = new MaterializedViewUpdateDesc(tableName, retrieveAndInclude, false, false);
            DDLWork ddlWork = new DDLWork(inputs, outputs, materializedViewUpdateDesc);
            targetTask.addDependentTask(TaskFactory.get(ddlWork, conf));
        } else if (desc instanceof AlterMaterializedViewRewriteDesc) {
            AlterMaterializedViewRewriteDesc alterMVRewriteDesc = (AlterMaterializedViewRewriteDesc) desc;
            String tableName = alterMVRewriteDesc.getMaterializedViewName();
            boolean retrieveAndInclude = alterMVRewriteDesc.isRewriteEnable();
            boolean disableRewrite = !alterMVRewriteDesc.isRewriteEnable();
            MaterializedViewUpdateDesc materializedViewUpdateDesc = new MaterializedViewUpdateDesc(tableName, retrieveAndInclude, disableRewrite, false);
            DDLWork ddlWork = new DDLWork(inputs, outputs, materializedViewUpdateDesc);
            targetTask.addDependentTask(TaskFactory.get(ddlWork, conf));
        }
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DDLTask(org.apache.hadoop.hive.ql.ddl.DDLTask) BasicStatsNoJobTask(org.apache.hadoop.hive.ql.stats.BasicStatsNoJobTask) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) Task(org.apache.hadoop.hive.ql.exec.Task) StatsTask(org.apache.hadoop.hive.ql.exec.StatsTask) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) AlterMaterializedViewRewriteDesc(org.apache.hadoop.hive.ql.ddl.view.materialized.alter.rewrite.AlterMaterializedViewRewriteDesc) StatsTask(org.apache.hadoop.hive.ql.exec.StatsTask) CreateMaterializedViewDesc(org.apache.hadoop.hive.ql.ddl.view.create.CreateMaterializedViewDesc) MaterializedViewUpdateDesc(org.apache.hadoop.hive.ql.ddl.view.materialized.update.MaterializedViewUpdateDesc) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) DDLTask(org.apache.hadoop.hive.ql.ddl.DDLTask) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) DDLDesc(org.apache.hadoop.hive.ql.ddl.DDLDesc)

Aggregations

DDLTask (org.apache.hadoop.hive.ql.ddl.DDLTask)7 CreateTableDesc (org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc)4 HashMap (java.util.HashMap)3 Path (org.apache.hadoop.fs.Path)3 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)3 HashSet (java.util.HashSet)2 DDLDesc (org.apache.hadoop.hive.ql.ddl.DDLDesc)2 Task (org.apache.hadoop.hive.ql.exec.Task)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 Table (org.apache.hadoop.hive.ql.metadata.Table)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 LinkedHashMap (java.util.LinkedHashMap)1 LinkedHashSet (java.util.LinkedHashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 TableName (org.apache.hadoop.hive.common.TableName)1 HiveConf (org.apache.hadoop.hive.conf.HiveConf)1 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)1