Search in sources :

Example 21 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class DDLSemanticAnalyzer method analyzeCreateDatabase.

private void analyzeCreateDatabase(ASTNode ast) throws SemanticException {
    String dbName = unescapeIdentifier(ast.getChild(0).getText());
    boolean ifNotExists = false;
    String dbComment = null;
    String dbLocation = null;
    Map<String, String> dbProps = null;
    for (int i = 1; i < ast.getChildCount(); i++) {
        ASTNode childNode = (ASTNode) ast.getChild(i);
        switch(childNode.getToken().getType()) {
            case HiveParser.TOK_IFNOTEXISTS:
                ifNotExists = true;
                break;
            case HiveParser.TOK_DATABASECOMMENT:
                dbComment = unescapeSQLString(childNode.getChild(0).getText());
                break;
            case TOK_DATABASEPROPERTIES:
                dbProps = DDLSemanticAnalyzer.getProps((ASTNode) childNode.getChild(0));
                break;
            case TOK_DATABASELOCATION:
                dbLocation = unescapeSQLString(childNode.getChild(0).getText());
                addLocationToOutputs(dbLocation);
                break;
            default:
                throw new SemanticException("Unrecognized token in CREATE DATABASE statement");
        }
    }
    CreateDatabaseDesc createDatabaseDesc = new CreateDatabaseDesc(dbName, dbComment, dbLocation, ifNotExists);
    if (dbProps != null) {
        createDatabaseDesc.setDatabaseProperties(dbProps);
    }
    Database database = new Database(dbName, dbComment, dbLocation, dbProps);
    outputs.add(new WriteEntity(database, WriteEntity.WriteType.DDL_NO_LOCK));
    rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), createDatabaseDesc)));
}
Also used : DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) CreateDatabaseDesc(org.apache.hadoop.hive.ql.plan.CreateDatabaseDesc) ShowCreateDatabaseDesc(org.apache.hadoop.hive.ql.plan.ShowCreateDatabaseDesc) Database(org.apache.hadoop.hive.metastore.api.Database) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)

Example 22 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class DDLSemanticAnalyzer method analyzeUnlockDatabase.

private void analyzeUnlockDatabase(ASTNode ast) throws SemanticException {
    String dbName = unescapeIdentifier(ast.getChild(0).getText());
    inputs.add(new ReadEntity(getDatabase(dbName)));
    // Unlock database operation is to release the lock explicitly, the
    // operation itself don't need to be locked. Set the WriteEntity as
    // WriteType: DDL_NO_LOCK here, otherwise it will conflict with
    // Hive's transaction.
    outputs.add(new WriteEntity(getDatabase(dbName), WriteType.DDL_NO_LOCK));
    UnlockDatabaseDesc unlockDatabaseDesc = new UnlockDatabaseDesc(dbName);
    DDLWork work = new DDLWork(getInputs(), getOutputs(), unlockDatabaseDesc);
    rootTasks.add(TaskFactory.get(work));
    // Need to initialize the lock manager
    ctx.setNeedLockMgr(true);
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) UnlockDatabaseDesc(org.apache.hadoop.hive.ql.plan.UnlockDatabaseDesc) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 23 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class DDLSemanticAnalyzer method analyzeDropDatabase.

private void analyzeDropDatabase(ASTNode ast) throws SemanticException {
    String dbName = unescapeIdentifier(ast.getChild(0).getText());
    boolean ifExists = false;
    boolean ifCascade = false;
    if (null != ast.getFirstChildWithType(HiveParser.TOK_IFEXISTS)) {
        ifExists = true;
    }
    if (null != ast.getFirstChildWithType(HiveParser.TOK_CASCADE)) {
        ifCascade = true;
    }
    Database database = getDatabase(dbName, !ifExists);
    if (database == null) {
        return;
    }
    // if cascade=true, then we need to authorize the drop table action as well
    if (ifCascade) {
        // add the tables as well to outputs
        List<String> tableNames;
        // get names of all tables under this dbName
        try {
            tableNames = db.getAllTables(dbName);
        } catch (HiveException e) {
            throw new SemanticException(e);
        }
        // add tables to outputs
        if (tableNames != null) {
            for (String tableName : tableNames) {
                Table table = getTable(dbName, tableName, true);
                // We want no lock here, as the database lock will cover the tables,
                // and putting a lock will actually cause us to deadlock on ourselves.
                outputs.add(new WriteEntity(table, WriteEntity.WriteType.DDL_NO_LOCK));
            }
        }
    }
    inputs.add(new ReadEntity(database));
    outputs.add(new WriteEntity(database, WriteEntity.WriteType.DDL_EXCLUSIVE));
    DropDatabaseDesc dropDatabaseDesc = new DropDatabaseDesc(dbName, ifExists, ifCascade, new ReplicationSpec());
    rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), dropDatabaseDesc)));
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) DropDatabaseDesc(org.apache.hadoop.hive.ql.plan.DropDatabaseDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Table(org.apache.hadoop.hive.ql.metadata.Table) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) Database(org.apache.hadoop.hive.metastore.api.Database) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 24 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class TaskCompiler method patchUpAfterCTASorMaterializedView.

private void patchUpAfterCTASorMaterializedView(final List<Task<? extends Serializable>> rootTasks, final HashSet<WriteEntity> outputs, Task<? extends Serializable> createTask) {
    // clear the mapredWork output file from outputs for CTAS
    // DDLWork at the tail of the chain will have the output
    Iterator<WriteEntity> outIter = outputs.iterator();
    while (outIter.hasNext()) {
        switch(outIter.next().getType()) {
            case DFS_DIR:
            case LOCAL_DIR:
                outIter.remove();
                break;
            default:
                break;
        }
    }
    // find all leaf tasks and make the DDLTask as a dependent task on all of them
    HashSet<Task<? extends Serializable>> leaves = new LinkedHashSet<>();
    getLeafTasks(rootTasks, leaves);
    assert (leaves.size() > 0);
    Task<? extends Serializable> targetTask = createTask;
    for (Task<? extends Serializable> task : leaves) {
        if (task instanceof StatsTask) {
            // StatsTask require table to already exist
            for (Task<? extends Serializable> parentOfStatsTask : task.getParentTasks()) {
                parentOfStatsTask.addDependentTask(createTask);
            }
            for (Task<? extends Serializable> parentOfCrtTblTask : createTask.getParentTasks()) {
                parentOfCrtTblTask.removeDependentTask(task);
            }
            createTask.addDependentTask(task);
            targetTask = task;
        } else {
            task.addDependentTask(createTask);
        }
    }
    // Add task to insert / delete materialized view from registry if needed
    if (createTask instanceof DDLTask) {
        DDLTask ddlTask = (DDLTask) createTask;
        DDLWork work = ddlTask.getWork();
        String tableName = null;
        boolean retrieveAndInclude = false;
        boolean disableRewrite = false;
        if (work.getCreateViewDesc() != null && work.getCreateViewDesc().isMaterialized()) {
            tableName = work.getCreateViewDesc().getViewName();
            retrieveAndInclude = work.getCreateViewDesc().isRewriteEnabled();
        } else if (work.getAlterMaterializedViewDesc() != null) {
            tableName = work.getAlterMaterializedViewDesc().getMaterializedViewName();
            if (work.getAlterMaterializedViewDesc().isRewriteEnable()) {
                retrieveAndInclude = true;
            } else {
                disableRewrite = true;
            }
        } else {
            return;
        }
        targetTask.addDependentTask(TaskFactory.get(new MaterializedViewDesc(tableName, retrieveAndInclude, disableRewrite, false), conf));
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) DDLTask(org.apache.hadoop.hive.ql.exec.DDLTask) Task(org.apache.hadoop.hive.ql.exec.Task) StatsTask(org.apache.hadoop.hive.ql.exec.StatsTask) Serializable(java.io.Serializable) MaterializedViewDesc(org.apache.hadoop.hive.ql.exec.MaterializedViewDesc) StatsTask(org.apache.hadoop.hive.ql.exec.StatsTask) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) DDLTask(org.apache.hadoop.hive.ql.exec.DDLTask) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 25 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class UpdateDeleteSemanticAnalyzer method updateOutputs.

/**
 * SemanticAnalyzer will generate a WriteEntity for the target table since it doesn't know/check
 * if the read and write are of the same table in "insert ... select ....".  Since DbTxnManager
 * uses Read/WriteEntity objects to decide which locks to acquire, we get more concurrency if we
 * have change the table WriteEntity to a set of partition WriteEntity objects based on
 * ReadEntity objects computed for this table.
 */
private void updateOutputs(Table targetTable) {
    markReadEntityForUpdate();
    if (targetTable.isPartitioned()) {
        List<ReadEntity> partitionsRead = getRestrictedPartitionSet(targetTable);
        if (!partitionsRead.isEmpty()) {
            // if there is WriteEntity with WriteType=UPDATE/DELETE for target table, replace it with
            // WriteEntity for each partition
            List<WriteEntity> toRemove = new ArrayList<>();
            for (WriteEntity we : outputs) {
                WriteEntity.WriteType wt = we.getWriteType();
                if (isTargetTable(we, targetTable) && (wt == WriteEntity.WriteType.UPDATE || wt == WriteEntity.WriteType.DELETE)) {
                    /**
                     * The assumption here is that SemanticAnalyzer will will generate ReadEntity for each
                     * partition that exists and is matched by the WHERE clause (which may be all of them).
                     * Since we don't allow updating the value of a partition column, we know that we always
                     * write the same (or fewer) partitions than we read.  Still, the write is a Dynamic
                     * Partition write - see HIVE-15032.
                     */
                    toRemove.add(we);
                }
            }
            outputs.removeAll(toRemove);
            // TODO: why is this like that?
            for (ReadEntity re : partitionsRead) {
                for (WriteEntity original : toRemove) {
                    // since we may have both Update and Delete branches, Auth needs to know
                    WriteEntity we = new WriteEntity(re.getPartition(), original.getWriteType());
                    we.setDynamicPartitionWrite(original.isDynamicPartitionWrite());
                    outputs.add(we);
                }
            }
        }
    }
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) ArrayList(java.util.ArrayList) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Aggregations

WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)88 Table (org.apache.hadoop.hive.ql.metadata.Table)39 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)35 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)24 Partition (org.apache.hadoop.hive.ql.metadata.Partition)24 ArrayList (java.util.ArrayList)18 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)14 Path (org.apache.hadoop.fs.Path)13 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)13 Referenceable (org.apache.atlas.typesystem.Referenceable)11 Database (org.apache.hadoop.hive.metastore.api.Database)11 Test (org.junit.Test)11 QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)10 HashMap (java.util.HashMap)9 LinkedHashMap (java.util.LinkedHashMap)9 Test (org.testng.annotations.Test)9 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)8 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)8 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)8 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)8