Search in sources :

Example 21 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.

the class UpdateDeleteSemanticAnalyzer method updateOutputs.

/**
 * SemanticAnalyzer will generate a WriteEntity for the target table since it doesn't know/check
 * if the read and write are of the same table in "insert ... select ....".  Since DbTxnManager
 * uses Read/WriteEntity objects to decide which locks to acquire, we get more concurrency if we
 * have change the table WriteEntity to a set of partition WriteEntity objects based on
 * ReadEntity objects computed for this table.
 */
private void updateOutputs(Table targetTable) {
    markReadEntityForUpdate();
    if (targetTable.isPartitioned()) {
        List<ReadEntity> partitionsRead = getRestrictedPartitionSet(targetTable);
        if (!partitionsRead.isEmpty()) {
            // if there is WriteEntity with WriteType=UPDATE/DELETE for target table, replace it with
            // WriteEntity for each partition
            List<WriteEntity> toRemove = new ArrayList<>();
            for (WriteEntity we : outputs) {
                WriteEntity.WriteType wt = we.getWriteType();
                if (isTargetTable(we, targetTable) && (wt == WriteEntity.WriteType.UPDATE || wt == WriteEntity.WriteType.DELETE)) {
                    /**
                     * The assumption here is that SemanticAnalyzer will will generate ReadEntity for each
                     * partition that exists and is matched by the WHERE clause (which may be all of them).
                     * Since we don't allow updating the value of a partition column, we know that we always
                     * write the same (or fewer) partitions than we read.  Still, the write is a Dynamic
                     * Partition write - see HIVE-15032.
                     */
                    toRemove.add(we);
                }
            }
            outputs.removeAll(toRemove);
            // TODO: why is this like that?
            for (ReadEntity re : partitionsRead) {
                for (WriteEntity original : toRemove) {
                    // since we may have both Update and Delete branches, Auth needs to know
                    WriteEntity we = new WriteEntity(re.getPartition(), original.getWriteType());
                    we.setDynamicPartitionWrite(original.isDynamicPartitionWrite());
                    outputs.add(we);
                }
            }
        }
    }
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) ArrayList(java.util.ArrayList) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 22 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.

the class DummyTxnManager method acquireLocks.

@Override
public void acquireLocks(QueryPlan plan, Context ctx, String username, LockedDriverState lDrvState) throws LockException {
    // Make sure we've built the lock manager
    getLockManager();
    // lock manager
    if (lockMgr == null) {
        return;
    }
    List<HiveLockObj> lockObjects = new ArrayList<HiveLockObj>();
    // its parents also
    for (ReadEntity input : plan.getInputs()) {
        if (!input.needsLock()) {
            continue;
        }
        LOG.debug("Adding " + input.getName() + " to list of lock inputs");
        if (input.getType() == ReadEntity.Type.DATABASE) {
            lockObjects.addAll(getLockObjects(plan, input.getDatabase(), null, null, HiveLockMode.SHARED));
        } else if (input.getType() == ReadEntity.Type.TABLE) {
            lockObjects.addAll(getLockObjects(plan, null, input.getTable(), null, HiveLockMode.SHARED));
        } else {
            lockObjects.addAll(getLockObjects(plan, null, null, input.getPartition(), HiveLockMode.SHARED));
        }
    }
    for (WriteEntity output : plan.getOutputs()) {
        HiveLockMode lockMode = getWriteEntityLockMode(output);
        if (lockMode == null) {
            continue;
        }
        LOG.debug("Adding " + output.getName() + " to list of lock outputs");
        List<HiveLockObj> lockObj = null;
        if (output.getType() == WriteEntity.Type.DATABASE) {
            lockObjects.addAll(getLockObjects(plan, output.getDatabase(), null, null, lockMode));
        } else if (output.getTyp() == WriteEntity.Type.TABLE) {
            lockObj = getLockObjects(plan, null, output.getTable(), null, lockMode);
        } else if (output.getTyp() == WriteEntity.Type.PARTITION) {
            lockObj = getLockObjects(plan, null, null, output.getPartition(), lockMode);
        } else // In case of dynamic queries, it is possible to have incomplete dummy partitions
        if (output.getTyp() == WriteEntity.Type.DUMMYPARTITION) {
            lockObj = getLockObjects(plan, null, null, output.getPartition(), HiveLockMode.SHARED);
        }
        if (lockObj != null) {
            lockObjects.addAll(lockObj);
            ctx.getOutputLockObjects().put(output, lockObj);
        }
    }
    if (lockObjects.isEmpty() && !ctx.isNeedLockMgr()) {
        return;
    }
    dedupLockObjects(lockObjects);
    List<HiveLock> hiveLocks = lockMgr.lock(lockObjects, false, lDrvState);
    if (hiveLocks == null) {
        throw new LockException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
    } else {
        ctx.setHiveLocks(hiveLocks);
    }
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 23 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project atlas by apache.

the class CreateHiveProcess method skipProcess.

private boolean skipProcess() {
    Set<ReadEntity> inputs = getHiveContext().getInputs();
    Set<WriteEntity> outputs = getHiveContext().getOutputs();
    boolean ret = CollectionUtils.isEmpty(inputs) && CollectionUtils.isEmpty(outputs);
    if (!ret) {
        if (getContext().getHiveOperation() == HiveOperation.QUERY) {
            // Select query has only one output
            if (outputs.size() == 1) {
                WriteEntity output = outputs.iterator().next();
                if (output.getType() == Entity.Type.DFS_DIR || output.getType() == Entity.Type.LOCAL_DIR) {
                    if (output.getWriteType() == WriteEntity.WriteType.PATH_WRITE && output.isTempURI()) {
                        ret = true;
                    }
                }
            }
        }
    }
    return ret;
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 24 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project atlas by apache.

the class HiveITBase method getProcessQualifiedName.

@VisibleForTesting
protected static String getProcessQualifiedName(HiveMetaStoreBridge dgiBridge, HiveEventContext eventContext, final SortedSet<ReadEntity> sortedHiveInputs, final SortedSet<WriteEntity> sortedHiveOutputs, SortedMap<ReadEntity, AtlasEntity> hiveInputsMap, SortedMap<WriteEntity, AtlasEntity> hiveOutputsMap) throws HiveException {
    HiveOperation op = eventContext.getOperation();
    if (isCreateOp(eventContext)) {
        Entity entity = getEntityByType(sortedHiveOutputs, Entity.Type.TABLE);
        if (entity != null) {
            Table outTable = entity.getTable();
            // refresh table
            outTable = dgiBridge.getHiveClient().getTable(outTable.getDbName(), outTable.getTableName());
            return HiveMetaStoreBridge.getTableProcessQualifiedName(dgiBridge.getClusterName(), outTable);
        }
    }
    StringBuilder buffer = new StringBuilder(op.getOperationName());
    boolean ignoreHDFSPathsinQFName = ignoreHDFSPathsinQFName(op, sortedHiveInputs, sortedHiveOutputs);
    if (ignoreHDFSPathsinQFName && LOG.isDebugEnabled()) {
        LOG.debug("Ignoring HDFS paths in qualifiedName for {} {} ", op, eventContext.getQueryStr());
    }
    addInputs(dgiBridge, op, sortedHiveInputs, buffer, hiveInputsMap, ignoreHDFSPathsinQFName);
    buffer.append(IO_SEP);
    addOutputs(dgiBridge, op, sortedHiveOutputs, buffer, hiveOutputsMap, ignoreHDFSPathsinQFName);
    LOG.info("Setting process qualified name to {}", buffer);
    return buffer.toString();
}
Also used : HiveOperation(org.apache.hadoop.hive.ql.plan.HiveOperation) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) Entity(org.apache.hadoop.hive.ql.hooks.Entity) Table(org.apache.hadoop.hive.ql.metadata.Table) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 25 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project atlas by apache.

the class HiveHookIT method testDropAndRecreateCTASOutput.

@Test
public void testDropAndRecreateCTASOutput() throws Exception {
    String tableName = createTable();
    String ctasTableName = "table" + random();
    String query = "create table " + ctasTableName + " as select * from " + tableName;
    runCommand(query);
    assertTableIsRegistered(DEFAULT_DB, ctasTableName);
    Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
    Set<WriteEntity> outputs = getOutputs(ctasTableName, Entity.Type.TABLE);
    HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, inputs, outputs);
    String processId = assertProcessIsRegistered(hiveEventContext);
    String drpquery = String.format("drop table %s ", ctasTableName);
    runCommandWithDelay(drpquery, 100);
    assertTableIsNotRegistered(DEFAULT_DB, ctasTableName);
    runCommand(query);
    assertTableIsRegistered(DEFAULT_DB, ctasTableName);
    outputs = getOutputs(ctasTableName, Entity.Type.TABLE);
    String process2Id = assertProcessIsRegistered(hiveEventContext, inputs, outputs);
    assertNotEquals(process2Id, processId);
    AtlasEntity processsEntity = atlasClientV2.getEntityByGuid(processId).getEntity();
    validateOutputTables(processsEntity, outputs);
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Aggregations

ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)139 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)70 Table (org.apache.hadoop.hive.ql.metadata.Table)69 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)31 Partition (org.apache.hadoop.hive.ql.metadata.Partition)29 ArrayList (java.util.ArrayList)27 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)27 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)24 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)22 HashMap (java.util.HashMap)16 Test (org.testng.annotations.Test)16 Map (java.util.Map)13 LinkedHashMap (java.util.LinkedHashMap)12 Path (org.apache.hadoop.fs.Path)12 List (java.util.List)11 Database (org.apache.hadoop.hive.metastore.api.Database)11 AtlasEntity (org.apache.atlas.model.instance.AtlasEntity)10 Referenceable (org.apache.atlas.typesystem.Referenceable)10 HashSet (java.util.HashSet)9 FileNotFoundException (java.io.FileNotFoundException)7