use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project incubator-atlas by apache.
the class HiveHookIT method testInsertIntoDFSDirPartitioned.
@Test
public void testInsertIntoDFSDirPartitioned() throws Exception {
// Test with partitioned table
String tableName = createTable(true);
String pFile1 = createTestDFSPath("somedfspath1");
String query = "insert overwrite DIRECTORY '" + pFile1 + "' select id, name from " + tableName + " where dt = '" + PART_FILE + "'";
runCommand(query);
Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
final Set<WriteEntity> outputs = getOutputs(pFile1, Entity.Type.DFS_DIR);
outputs.iterator().next().setWriteType(WriteEntity.WriteType.PATH_WRITE);
final Set<ReadEntity> partitionIps = new LinkedHashSet<>(inputs);
partitionIps.addAll(getInputs(DEFAULT_DB + "@" + tableName + "@dt='" + PART_FILE + "'", Entity.Type.PARTITION));
Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.QUERY, partitionIps, outputs), inputs, outputs);
// Rerun same query with different HDFS path. Should not create another process and should update it.
final String pFile2 = createTestDFSPath("somedfspath2");
query = "insert overwrite DIRECTORY '" + pFile2 + "' select id, name from " + tableName + " where dt = '" + PART_FILE + "'";
runCommand(query);
final Set<WriteEntity> pFile2Outputs = getOutputs(pFile2, Entity.Type.DFS_DIR);
pFile2Outputs.iterator().next().setWriteType(WriteEntity.WriteType.PATH_WRITE);
// Now the process has 2 paths - one older with deleted reference to partition and another with the the latest partition
Set<WriteEntity> p2Outputs = new LinkedHashSet<WriteEntity>() {
{
addAll(pFile2Outputs);
addAll(outputs);
}
};
Referenceable process2Reference = validateProcess(constructEvent(query, HiveOperation.QUERY, partitionIps, pFile2Outputs), inputs, p2Outputs);
validateHDFSPaths(process2Reference, OUTPUTS, pFile2);
Assert.assertEquals(process2Reference.getId()._getId(), processReference.getId()._getId());
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project incubator-atlas by apache.
the class HiveHook method renameTable.
private void renameTable(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws AtlasHookException {
try {
// crappy, no easy of getting new name
assert event.getInputs() != null && event.getInputs().size() == 1;
assert event.getOutputs() != null && event.getOutputs().size() > 0;
// Update entity if not exists
ReadEntity oldEntity = event.getInputs().iterator().next();
Table oldTable = oldEntity.getTable();
for (WriteEntity writeEntity : event.getOutputs()) {
if (writeEntity.getType() == Entity.Type.TABLE) {
Table newTable = writeEntity.getTable();
// Hive sends with both old and new table names in the outputs which is weird. So skipping that with the below check
if (!newTable.getDbName().equals(oldTable.getDbName()) || !newTable.getTableName().equals(oldTable.getTableName())) {
final String oldQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(dgiBridge.getClusterName(), oldTable);
final String newQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(dgiBridge.getClusterName(), newTable);
// Create/update old table entity - create entity with oldQFNme and old tableName if it doesnt exist. If exists, will update
// We always use the new entity while creating the table since some flags, attributes of the table are not set in inputEntity and Hive.getTable(oldTableName) also fails since the table doesnt exist in hive anymore
final LinkedHashMap<Type, Referenceable> tables = createOrUpdateEntities(dgiBridge, event, writeEntity, true);
Referenceable tableEntity = tables.get(Type.TABLE);
// Reset regular column QF Name to old Name and create a new partial notification request to replace old column QFName to newName to retain any existing traits
replaceColumnQFName(event, (List<Referenceable>) tableEntity.get(HiveMetaStoreBridge.COLUMNS), oldQualifiedName, newQualifiedName);
// Reset partition key column QF Name to old Name and create a new partial notification request to replace old column QFName to newName to retain any existing traits
replaceColumnQFName(event, (List<Referenceable>) tableEntity.get(HiveMetaStoreBridge.PART_COLS), oldQualifiedName, newQualifiedName);
// Reset SD QF Name to old Name and create a new partial notification request to replace old SD QFName to newName to retain any existing traits
replaceSDQFName(event, tableEntity, oldQualifiedName, newQualifiedName);
// Reset Table QF Name to old Name and create a new partial notification request to replace old Table QFName to newName
replaceTableQFName(event, oldTable, newTable, tableEntity, oldQualifiedName, newQualifiedName);
}
}
}
} catch (Exception e) {
throw new AtlasHookException("HiveHook.renameTable() failed.", e);
}
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class DescDatabaseAnalyzer method analyzeInternal.
@Override
public void analyzeInternal(ASTNode root) throws SemanticException {
if (root.getChildCount() == 0 || root.getChildCount() > 2) {
throw new SemanticException("Unexpected Tokens at DESCRIBE DATABASE");
}
ctx.setResFile(ctx.getLocalTmpPath());
String databaseName = stripQuotes(root.getChild(0).getText());
boolean isExtended = root.getChildCount() == 2;
inputs.add(new ReadEntity(getDatabase(databaseName)));
DescDatabaseDesc desc = new DescDatabaseDesc(ctx.getResFile(), databaseName, isExtended);
Task<DDLWork> task = TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc));
rootTasks.add(task);
task.setFetchSource(true);
setFetchTask(createFetchTask(desc.getSchema()));
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class LockDatabaseAnalyzer method analyzeInternal.
@Override
public void analyzeInternal(ASTNode root) throws SemanticException {
String databaseName = unescapeIdentifier(root.getChild(0).getText());
String mode = unescapeIdentifier(root.getChild(1).getText().toUpperCase());
inputs.add(new ReadEntity(getDatabase(databaseName)));
// Lock database operation is to acquire the lock explicitly, the operation itself doesn't need to be locked.
// Set the WriteEntity as WriteType: DDL_NO_LOCK here, otherwise it will conflict with Hive's transaction.
outputs.add(new WriteEntity(getDatabase(databaseName), WriteType.DDL_NO_LOCK));
LockDatabaseDesc desc = new LockDatabaseDesc(databaseName, mode, HiveConf.getVar(conf, ConfVars.HIVEQUERYID), ctx.getCmd());
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc)));
ctx.setNeedLockMgr(true);
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.
the class ShowCreateDatabaseAnalyzer method analyzeInternal.
@Override
public void analyzeInternal(ASTNode root) throws SemanticException {
ctx.setResFile(ctx.getLocalTmpPath());
String databaseName = getUnescapedName((ASTNode) root.getChild(0));
Database database = getDatabase(databaseName);
inputs.add(new ReadEntity(database));
ShowCreateDatabaseDesc desc = new ShowCreateDatabaseDesc(databaseName, ctx.getResFile());
Task<DDLWork> task = TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc));
rootTasks.add(task);
task.setFetchSource(true);
setFetchTask(createFetchTask(ShowCreateDatabaseDesc.SCHEMA));
}
Aggregations