use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project incubator-atlas by apache.
the class HiveHookIT method testInsertIntoTable.
@Test
public void testInsertIntoTable() throws Exception {
String inputTable1Name = createTable();
String inputTable2Name = createTable();
String insertTableName = createTable();
assertTableIsRegistered(DEFAULT_DB, inputTable1Name);
assertTableIsRegistered(DEFAULT_DB, insertTableName);
String query = "insert into " + insertTableName + " select t1.id, t1.name from " + inputTable2Name + " as t2, " + inputTable1Name + " as t1 where t1.id=t2.id";
runCommand(query);
final Set<ReadEntity> inputs = getInputs(inputTable1Name, Entity.Type.TABLE);
inputs.addAll(getInputs(inputTable2Name, Entity.Type.TABLE));
Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);
(outputs.iterator().next()).setWriteType(WriteEntity.WriteType.INSERT);
HiveHook.HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
Set<ReadEntity> expectedInputs = new TreeSet<ReadEntity>(entityComparator) {
{
addAll(inputs);
}
};
assertTableIsRegistered(DEFAULT_DB, insertTableName);
Referenceable processRef1 = validateProcess(event, expectedInputs, outputs);
//Test sorting of tbl names
SortedSet<String> sortedTblNames = new TreeSet<>();
sortedTblNames.add(inputTable1Name.toLowerCase());
sortedTblNames.add(inputTable2Name.toLowerCase());
//Verify sorted order of inputs in qualified name
Assert.assertEquals(processRef1.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), Joiner.on(SEP).join("QUERY", getQualifiedTblName(sortedTblNames.first()), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, sortedTblNames.first())).getTime(), getQualifiedTblName(sortedTblNames.last()), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, sortedTblNames.last())).getTime()) + IO_SEP + SEP + Joiner.on(SEP).join(WriteEntity.WriteType.INSERT.name(), getQualifiedTblName(insertTableName), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, insertTableName)).getTime()));
//Rerun same query. Should result in same process
runCommandWithDelay(query, 1000);
Referenceable processRef2 = validateProcess(event, expectedInputs, outputs);
Assert.assertEquals(processRef1.getId()._getId(), processRef2.getId()._getId());
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project incubator-atlas by apache.
the class HiveHookIT method testExportImportUnPartitionedTable.
@Test
public void testExportImportUnPartitionedTable() throws Exception {
String tableName = createTable(false);
assertTableIsRegistered(DEFAULT_DB, tableName);
String filename = "pfile://" + mkdir("exportUnPartitioned");
String query = "export table " + tableName + " to \"" + filename + "\"";
runCommand(query);
Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR);
Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
validateHDFSPaths(processReference, OUTPUTS, filename);
validateInputTables(processReference, inputs);
//Import
String importTableName = createTable(false);
assertTableIsRegistered(DEFAULT_DB, importTableName);
query = "import table " + importTableName + " from '" + filename + "'";
runCommand(query);
outputs = getOutputs(importTableName, Entity.Type.TABLE);
validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
//Should create another process
filename = "pfile://" + mkdir("export2UnPartitioned");
query = "export table " + tableName + " to \"" + filename + "\"";
runCommand(query);
inputs = getInputs(tableName, Entity.Type.TABLE);
outputs = getOutputs(filename, Entity.Type.DFS_DIR);
validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
//import again shouyld create another process
query = "import table " + importTableName + " from '" + filename + "'";
runCommand(query);
outputs = getOutputs(importTableName, Entity.Type.TABLE);
validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class DbTxnManager method acquireLocks.
/**
* Normally client should call {@link #acquireLocks(org.apache.hadoop.hive.ql.QueryPlan, org.apache.hadoop.hive.ql.Context, String)}
* @param isBlocking if false, the method will return immediately; thus the locks may be in LockState.WAITING
* @return null if no locks were needed
*/
@VisibleForTesting
LockState acquireLocks(QueryPlan plan, Context ctx, String username, boolean isBlocking) throws LockException {
init();
// Make sure we've built the lock manager
getLockManager();
verifyState(plan);
boolean atLeastOneLock = false;
queryId = plan.getQueryId();
switch(plan.getOperation()) {
case SET_AUTOCOMMIT:
/**
*This is here for documentation purposes. This TM doesn't support this - only has one
* mode of operation documented at {@link DbTxnManager#isExplicitTransaction}
*/
return null;
}
LockRequestBuilder rqstBuilder = new LockRequestBuilder(queryId);
// link queryId to txnId
LOG.info("Setting lock request transaction to " + JavaUtils.txnIdToString(txnId) + " for queryId=" + queryId);
rqstBuilder.setTransactionId(txnId).setUser(username);
// For each source to read, get a shared lock
for (ReadEntity input : plan.getInputs()) {
if (!input.needsLock() || input.isUpdateOrDelete() || !needsLock(input)) {
// locks instead. Also, there's no need to lock temp tables since they're session wide
continue;
}
LockComponentBuilder compBuilder = new LockComponentBuilder();
compBuilder.setShared();
compBuilder.setOperationType(DataOperationType.SELECT);
Table t = null;
switch(input.getType()) {
case DATABASE:
compBuilder.setDbName(input.getDatabase().getName());
break;
case TABLE:
t = input.getTable();
compBuilder.setDbName(t.getDbName());
compBuilder.setTableName(t.getTableName());
break;
case PARTITION:
case DUMMYPARTITION:
compBuilder.setPartitionName(input.getPartition().getName());
t = input.getPartition().getTable();
compBuilder.setDbName(t.getDbName());
compBuilder.setTableName(t.getTableName());
break;
default:
// This is a file or something we don't hold locks for.
continue;
}
if (t != null) {
compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t));
}
LockComponent comp = compBuilder.build();
LOG.debug("Adding lock component to lock request " + comp.toString());
rqstBuilder.addLockComponent(comp);
atLeastOneLock = true;
}
// need a SEMI-SHARED.
for (WriteEntity output : plan.getOutputs()) {
LOG.debug("output is null " + (output == null));
if (output.getType() == Entity.Type.DFS_DIR || output.getType() == Entity.Type.LOCAL_DIR || !needsLock(output)) {
// We don't lock files or directories. We also skip locking temp tables.
continue;
}
LockComponentBuilder compBuilder = new LockComponentBuilder();
Table t = null;
switch(output.getType()) {
case DATABASE:
compBuilder.setDbName(output.getDatabase().getName());
break;
case TABLE:
case // in case of dynamic partitioning lock the table
DUMMYPARTITION:
t = output.getTable();
compBuilder.setDbName(t.getDbName());
compBuilder.setTableName(t.getTableName());
break;
case PARTITION:
compBuilder.setPartitionName(output.getPartition().getName());
t = output.getPartition().getTable();
compBuilder.setDbName(t.getDbName());
compBuilder.setTableName(t.getTableName());
break;
default:
// This is a file or something we don't hold locks for.
continue;
}
switch(output.getWriteType()) {
/* base this on HiveOperation instead? this and DDL_NO_LOCK is peppered all over the code...
Seems much cleaner if each stmt is identified as a particular HiveOperation (which I'd think
makes sense everywhere). This however would be problematic for merge...*/
case DDL_EXCLUSIVE:
compBuilder.setExclusive();
compBuilder.setOperationType(DataOperationType.NO_TXN);
break;
case INSERT_OVERWRITE:
t = getTable(output);
if (AcidUtils.isTransactionalTable(t)) {
compBuilder.setSemiShared();
compBuilder.setOperationType(DataOperationType.UPDATE);
} else {
compBuilder.setExclusive();
compBuilder.setOperationType(DataOperationType.NO_TXN);
}
break;
case INSERT:
assert t != null;
if (AcidUtils.isFullAcidTable(t)) {
compBuilder.setShared();
} else {
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_TXN_STRICT_LOCKING_MODE)) {
compBuilder.setExclusive();
} else {
// this is backward compatible for non-ACID resources, w/o ACID semantics
compBuilder.setShared();
}
}
compBuilder.setOperationType(DataOperationType.INSERT);
break;
case DDL_SHARED:
compBuilder.setShared();
compBuilder.setOperationType(DataOperationType.NO_TXN);
break;
case UPDATE:
compBuilder.setSemiShared();
compBuilder.setOperationType(DataOperationType.UPDATE);
break;
case DELETE:
compBuilder.setSemiShared();
compBuilder.setOperationType(DataOperationType.DELETE);
break;
case DDL_NO_LOCK:
// No lock required here
continue;
default:
throw new RuntimeException("Unknown write type " + output.getWriteType().toString());
}
if (t != null) {
compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t));
}
compBuilder.setIsDynamicPartitionWrite(output.isDynamicPartitionWrite());
LockComponent comp = compBuilder.build();
LOG.debug("Adding lock component to lock request " + comp.toString());
rqstBuilder.addLockComponent(comp);
atLeastOneLock = true;
}
// this operation.
if (!atLeastOneLock) {
LOG.debug("No locks needed for queryId" + queryId);
return null;
}
List<HiveLock> locks = new ArrayList<HiveLock>(1);
LockState lockState = lockMgr.lock(rqstBuilder.build(), queryId, isBlocking, locks);
ctx.setHiveLocks(locks);
return lockState;
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class SimpleFetchOptimizer method checkTree.
// all we can handle is LimitOperator, FilterOperator SelectOperator and final FS
//
// for non-aggressive mode (minimal)
// 1. sampling is not allowed
// 2. for partitioned table, all filters should be targeted to partition column
// 3. SelectOperator should use only simple cast/column access
private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, TableScanOperator ts) throws HiveException {
SplitSample splitSample = pctx.getNameToSplitSample().get(alias);
if (!aggressive && splitSample != null) {
return null;
}
if (!aggressive && ts.getConf().getTableSample() != null) {
return null;
}
Table table = ts.getConf().getTableMetadata();
if (table == null) {
return null;
}
ReadEntity parent = PlanUtils.getParentViewInfo(alias, pctx.getViewAliasToInput());
if (!table.isPartitioned()) {
FetchData fetch = new FetchData(ts, parent, table, splitSample);
return checkOperators(fetch, aggressive, false);
}
boolean bypassFilter = false;
if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVEOPTPPD)) {
ExprNodeDesc pruner = pctx.getOpToPartPruner().get(ts);
if (PartitionPruner.onlyContainsPartnCols(table, pruner)) {
bypassFilter = !pctx.getPrunedPartitions(alias, ts).hasUnknownPartitions();
}
}
if (!aggressive && !bypassFilter) {
return null;
}
PrunedPartitionList partitions = pctx.getPrunedPartitions(alias, ts);
FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, bypassFilter);
return checkOperators(fetch, aggressive, bypassFilter);
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class DDLTask method renamePartition.
/**
* Rename a partition in a table
*
* @param db
* Database to rename the partition.
* @param renamePartitionDesc
* rename old Partition to new one.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
*/
private int renamePartition(Hive db, RenamePartitionDesc renamePartitionDesc) throws HiveException {
String tableName = renamePartitionDesc.getTableName();
LinkedHashMap<String, String> oldPartSpec = renamePartitionDesc.getOldPartSpec();
if (!allowOperationInReplicationScope(db, tableName, oldPartSpec, renamePartitionDesc.getReplicationSpec())) {
// or the existing table is newer than our update.
if (LOG.isDebugEnabled()) {
LOG.debug("DDLTask: Rename Partition is skipped as table {} / partition {} is newer than update", tableName, FileUtils.makePartName(new ArrayList<>(oldPartSpec.keySet()), new ArrayList<>(oldPartSpec.values())));
}
return 0;
}
String[] names = Utilities.getDbTableName(tableName);
if (Utils.isBootstrapDumpInProgress(db, names[0])) {
LOG.error("DDLTask: Rename Partition not allowed as bootstrap dump in progress");
throw new HiveException("Rename Partition: Not allowed as bootstrap dump in progress");
}
Table tbl = db.getTable(tableName);
Partition oldPart = db.getPartition(tbl, oldPartSpec, false);
if (oldPart == null) {
String partName = FileUtils.makePartName(new ArrayList<String>(oldPartSpec.keySet()), new ArrayList<String>(oldPartSpec.values()));
throw new HiveException("Rename partition: source partition [" + partName + "] does not exist.");
}
Partition part = db.getPartition(tbl, oldPartSpec, false);
part.setValues(renamePartitionDesc.getNewPartSpec());
db.renamePartition(tbl, oldPartSpec, part);
Partition newPart = db.getPartition(tbl, renamePartitionDesc.getNewPartSpec(), false);
work.getInputs().add(new ReadEntity(oldPart));
// We've already obtained a lock on the table, don't lock the partition too
addIfAbsentByName(new WriteEntity(newPart, WriteEntity.WriteType.DDL_NO_LOCK));
return 0;
}
Aggregations