use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.
the class HiveHookIT method testTruncateTable.
@Test
public void testTruncateTable() throws Exception {
String tableName = createTable(false);
String query = String.format("truncate table %s", tableName);
runCommand(query);
Set<WriteEntity> outputs = getOutputs(tableName, Entity.Type.TABLE);
String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
validateProcess(constructEvent(query, HiveOperation.TRUNCATETABLE, null, outputs));
//Check lineage
String datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName);
JSONObject response = atlasClient.getInputGraph(datasetName);
JSONObject vertices = response.getJSONObject("values").getJSONObject("vertices");
//Below should be assertTrue - Fix https://issues.apache.org/jira/browse/ATLAS-653
Assert.assertFalse(vertices.has(tableId));
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.
the class HiveHookIT method getOutputs.
private Set<WriteEntity> getOutputs(String inputName, Entity.Type entityType) throws HiveException {
final WriteEntity entity = new WriteEntity();
if (Entity.Type.DFS_DIR.equals(entityType) || Entity.Type.LOCAL_DIR.equals(entityType)) {
entity.setName(lower(new Path(inputName).toString()));
entity.setTyp(entityType);
} else {
entity.setName(getQualifiedTblName(inputName));
entity.setTyp(entityType);
}
if (entityType == Entity.Type.TABLE) {
entity.setT(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, inputName));
}
return new LinkedHashSet<WriteEntity>() {
{
add(entity);
}
};
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.
the class HiveHookIT method testInsertIntoTable.
@Test
public void testInsertIntoTable() throws Exception {
String inputTable1Name = createTable();
String inputTable2Name = createTable();
String insertTableName = createTable();
assertTableIsRegistered(DEFAULT_DB, inputTable1Name);
assertTableIsRegistered(DEFAULT_DB, insertTableName);
String query = "insert into " + insertTableName + " select t1.id, t1.name from " + inputTable2Name + " as t2, " + inputTable1Name + " as t1 where t1.id=t2.id";
runCommand(query);
final Set<ReadEntity> inputs = getInputs(inputTable1Name, Entity.Type.TABLE);
inputs.addAll(getInputs(inputTable2Name, Entity.Type.TABLE));
Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);
(outputs.iterator().next()).setWriteType(WriteEntity.WriteType.INSERT);
HiveHook.HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
Set<ReadEntity> expectedInputs = new TreeSet<ReadEntity>(entityComparator) {
{
addAll(inputs);
}
};
assertTableIsRegistered(DEFAULT_DB, insertTableName);
Referenceable processRef1 = validateProcess(event, expectedInputs, outputs);
//Test sorting of tbl names
SortedSet<String> sortedTblNames = new TreeSet<>();
sortedTblNames.add(inputTable1Name.toLowerCase());
sortedTblNames.add(inputTable2Name.toLowerCase());
//Verify sorted order of inputs in qualified name
Assert.assertEquals(processRef1.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), Joiner.on(SEP).join("QUERY", getQualifiedTblName(sortedTblNames.first()), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, sortedTblNames.first())).getTime(), getQualifiedTblName(sortedTblNames.last()), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, sortedTblNames.last())).getTime()) + IO_SEP + SEP + Joiner.on(SEP).join(WriteEntity.WriteType.INSERT.name(), getQualifiedTblName(insertTableName), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, insertTableName)).getTime()));
//Rerun same query. Should result in same process
runCommandWithDelay(query, 1000);
Referenceable processRef2 = validateProcess(event, expectedInputs, outputs);
Assert.assertEquals(processRef1.getId()._getId(), processRef2.getId()._getId());
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.
the class HiveHookIT method testExportImportUnPartitionedTable.
@Test
public void testExportImportUnPartitionedTable() throws Exception {
String tableName = createTable(false);
assertTableIsRegistered(DEFAULT_DB, tableName);
String filename = "pfile://" + mkdir("exportUnPartitioned");
String query = "export table " + tableName + " to \"" + filename + "\"";
runCommand(query);
Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR);
Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
validateHDFSPaths(processReference, OUTPUTS, filename);
validateInputTables(processReference, inputs);
//Import
String importTableName = createTable(false);
assertTableIsRegistered(DEFAULT_DB, importTableName);
query = "import table " + importTableName + " from '" + filename + "'";
runCommand(query);
outputs = getOutputs(importTableName, Entity.Type.TABLE);
validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
//Should create another process
filename = "pfile://" + mkdir("export2UnPartitioned");
query = "export table " + tableName + " to \"" + filename + "\"";
runCommand(query);
inputs = getInputs(tableName, Entity.Type.TABLE);
outputs = getOutputs(filename, Entity.Type.DFS_DIR);
validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
//import again shouyld create another process
query = "import table " + importTableName + " from '" + filename + "'";
runCommand(query);
outputs = getOutputs(importTableName, Entity.Type.TABLE);
validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.
the class DbTxnManager method acquireLocks.
/**
* Normally client should call {@link #acquireLocks(org.apache.hadoop.hive.ql.QueryPlan, org.apache.hadoop.hive.ql.Context, String)}
* @param isBlocking if false, the method will return immediately; thus the locks may be in LockState.WAITING
* @return null if no locks were needed
*/
@VisibleForTesting
LockState acquireLocks(QueryPlan plan, Context ctx, String username, boolean isBlocking) throws LockException {
init();
// Make sure we've built the lock manager
getLockManager();
verifyState(plan);
boolean atLeastOneLock = false;
queryId = plan.getQueryId();
switch(plan.getOperation()) {
case SET_AUTOCOMMIT:
/**
*This is here for documentation purposes. This TM doesn't support this - only has one
* mode of operation documented at {@link DbTxnManager#isExplicitTransaction}
*/
return null;
}
LockRequestBuilder rqstBuilder = new LockRequestBuilder(queryId);
// link queryId to txnId
LOG.info("Setting lock request transaction to " + JavaUtils.txnIdToString(txnId) + " for queryId=" + queryId);
rqstBuilder.setTransactionId(txnId).setUser(username);
// For each source to read, get a shared lock
for (ReadEntity input : plan.getInputs()) {
if (!input.needsLock() || input.isUpdateOrDelete() || !needsLock(input)) {
// locks instead. Also, there's no need to lock temp tables since they're session wide
continue;
}
LockComponentBuilder compBuilder = new LockComponentBuilder();
compBuilder.setShared();
compBuilder.setOperationType(DataOperationType.SELECT);
Table t = null;
switch(input.getType()) {
case DATABASE:
compBuilder.setDbName(input.getDatabase().getName());
break;
case TABLE:
t = input.getTable();
compBuilder.setDbName(t.getDbName());
compBuilder.setTableName(t.getTableName());
break;
case PARTITION:
case DUMMYPARTITION:
compBuilder.setPartitionName(input.getPartition().getName());
t = input.getPartition().getTable();
compBuilder.setDbName(t.getDbName());
compBuilder.setTableName(t.getTableName());
break;
default:
// This is a file or something we don't hold locks for.
continue;
}
if (t != null) {
compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t));
}
LockComponent comp = compBuilder.build();
LOG.debug("Adding lock component to lock request " + comp.toString());
rqstBuilder.addLockComponent(comp);
atLeastOneLock = true;
}
// need a SEMI-SHARED.
for (WriteEntity output : plan.getOutputs()) {
LOG.debug("output is null " + (output == null));
if (output.getType() == Entity.Type.DFS_DIR || output.getType() == Entity.Type.LOCAL_DIR || !needsLock(output)) {
// We don't lock files or directories. We also skip locking temp tables.
continue;
}
LockComponentBuilder compBuilder = new LockComponentBuilder();
Table t = null;
switch(output.getType()) {
case DATABASE:
compBuilder.setDbName(output.getDatabase().getName());
break;
case TABLE:
case // in case of dynamic partitioning lock the table
DUMMYPARTITION:
t = output.getTable();
compBuilder.setDbName(t.getDbName());
compBuilder.setTableName(t.getTableName());
break;
case PARTITION:
compBuilder.setPartitionName(output.getPartition().getName());
t = output.getPartition().getTable();
compBuilder.setDbName(t.getDbName());
compBuilder.setTableName(t.getTableName());
break;
default:
// This is a file or something we don't hold locks for.
continue;
}
switch(output.getWriteType()) {
/* base this on HiveOperation instead? this and DDL_NO_LOCK is peppered all over the code...
Seems much cleaner if each stmt is identified as a particular HiveOperation (which I'd think
makes sense everywhere). This however would be problematic for merge...*/
case DDL_EXCLUSIVE:
compBuilder.setExclusive();
compBuilder.setOperationType(DataOperationType.NO_TXN);
break;
case INSERT_OVERWRITE:
t = getTable(output);
if (AcidUtils.isTransactionalTable(t)) {
compBuilder.setSemiShared();
compBuilder.setOperationType(DataOperationType.UPDATE);
} else {
compBuilder.setExclusive();
compBuilder.setOperationType(DataOperationType.NO_TXN);
}
break;
case INSERT:
assert t != null;
if (AcidUtils.isFullAcidTable(t)) {
compBuilder.setShared();
} else {
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_TXN_STRICT_LOCKING_MODE)) {
compBuilder.setExclusive();
} else {
// this is backward compatible for non-ACID resources, w/o ACID semantics
compBuilder.setShared();
}
}
compBuilder.setOperationType(DataOperationType.INSERT);
break;
case DDL_SHARED:
compBuilder.setShared();
compBuilder.setOperationType(DataOperationType.NO_TXN);
break;
case UPDATE:
compBuilder.setSemiShared();
compBuilder.setOperationType(DataOperationType.UPDATE);
break;
case DELETE:
compBuilder.setSemiShared();
compBuilder.setOperationType(DataOperationType.DELETE);
break;
case DDL_NO_LOCK:
// No lock required here
continue;
default:
throw new RuntimeException("Unknown write type " + output.getWriteType().toString());
}
if (t != null) {
compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t));
}
compBuilder.setIsDynamicPartitionWrite(output.isDynamicPartitionWrite());
LockComponent comp = compBuilder.build();
LOG.debug("Adding lock component to lock request " + comp.toString());
rqstBuilder.addLockComponent(comp);
atLeastOneLock = true;
}
// this operation.
if (!atLeastOneLock) {
LOG.debug("No locks needed for queryId" + queryId);
return null;
}
List<HiveLock> locks = new ArrayList<HiveLock>(1);
LockState lockState = lockMgr.lock(rqstBuilder.build(), queryId, isBlocking, locks);
ctx.setHiveLocks(locks);
return lockState;
}
Aggregations