use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project incubator-atlas by apache.
the class HiveHookIT method testExportImportPartitionedTable.
@Test
public void testExportImportPartitionedTable() throws Exception {
boolean isPartitionedTable = true;
final String tableName = createTable(isPartitionedTable);
assertTableIsRegistered(DEFAULT_DB, tableName);
//Add a partition
String partFile = "pfile://" + mkdir("partition");
String query = "alter table " + tableName + " add partition (dt='" + PART_FILE + "') location '" + partFile + "'";
runCommand(query);
String filename = "pfile://" + mkdir("export");
query = "export table " + tableName + " to \"" + filename + "\"";
runCommand(query);
final Set<ReadEntity> expectedExportInputs = getInputs(tableName, Entity.Type.TABLE);
final Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR);
//Note that export has only partition as input in this case
final Set<ReadEntity> partitionIps = getInputs(DEFAULT_DB + "@" + tableName + "@dt=" + PART_FILE, Entity.Type.PARTITION);
partitionIps.addAll(expectedExportInputs);
Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs), expectedExportInputs, outputs);
validateHDFSPaths(processReference, OUTPUTS, filename);
//Import
String importTableName = createTable(true);
assertTableIsRegistered(DEFAULT_DB, tableName);
query = "import table " + importTableName + " from '" + filename + "'";
runCommand(query);
final Set<ReadEntity> expectedImportInputs = getInputs(filename, Entity.Type.DFS_DIR);
final Set<WriteEntity> importOutputs = getOutputs(importTableName, Entity.Type.TABLE);
final Set<WriteEntity> partitionOps = getOutputs(DEFAULT_DB + "@" + importTableName + "@dt=" + PART_FILE, Entity.Type.PARTITION);
partitionOps.addAll(importOutputs);
validateProcess(constructEvent(query, HiveOperation.IMPORT, expectedImportInputs, partitionOps), expectedImportInputs, importOutputs);
//Export should update same process
filename = "pfile://" + mkdir("export2");
query = "export table " + tableName + " to \"" + filename + "\"";
runCommand(query);
final Set<WriteEntity> outputs2 = getOutputs(filename, Entity.Type.DFS_DIR);
Set<WriteEntity> p3Outputs = new LinkedHashSet<WriteEntity>() {
{
addAll(outputs2);
addAll(outputs);
}
};
validateProcess(constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs2), expectedExportInputs, p3Outputs);
query = "alter table " + importTableName + " drop partition (dt='" + PART_FILE + "')";
runCommand(query);
//Import should update same process
query = "import table " + importTableName + " from '" + filename + "'";
runCommandWithDelay(query, 1000);
final Set<ReadEntity> importInputs = getInputs(filename, Entity.Type.DFS_DIR);
final Set<ReadEntity> expectedImport2Inputs = new LinkedHashSet<ReadEntity>() {
{
addAll(importInputs);
addAll(expectedImportInputs);
}
};
validateProcess(constructEvent(query, HiveOperation.IMPORT, importInputs, partitionOps), expectedImport2Inputs, importOutputs);
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project incubator-atlas by apache.
the class HiveHookIT method testLoadDFSPathPartitioned.
@Test
public void testLoadDFSPathPartitioned() throws Exception {
String tableName = createTable(true, true, false);
assertTableIsRegistered(DEFAULT_DB, tableName);
final String loadFile = createTestDFSFile("loadDFSFile");
String query = "load data inpath '" + loadFile + "' into table " + tableName + " partition(dt = '" + PART_FILE + "')";
runCommand(query);
final Set<WriteEntity> outputs = getOutputs(tableName, Entity.Type.TABLE);
final Set<ReadEntity> inputs = getInputs(loadFile, Entity.Type.DFS_DIR);
final Set<WriteEntity> partitionOps = new LinkedHashSet<>(outputs);
partitionOps.addAll(getOutputs(DEFAULT_DB + "@" + tableName + "@dt=" + PART_FILE, Entity.Type.PARTITION));
Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.LOAD, inputs, partitionOps), inputs, outputs);
validateHDFSPaths(processReference, INPUTS, loadFile);
validateOutputTables(processReference, outputs);
final String loadFile2 = createTestDFSFile("loadDFSFile1");
query = "load data inpath '" + loadFile2 + "' into table " + tableName + " partition(dt = '" + PART_FILE + "')";
runCommand(query);
Set<ReadEntity> process2Inputs = getInputs(loadFile2, Entity.Type.DFS_DIR);
Set<ReadEntity> expectedInputs = new LinkedHashSet<>();
expectedInputs.addAll(process2Inputs);
expectedInputs.addAll(inputs);
validateProcess(constructEvent(query, HiveOperation.LOAD, expectedInputs, partitionOps), expectedInputs, outputs);
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project incubator-atlas by apache.
the class HiveHookIT method testInsertIntoTable.
@Test
public void testInsertIntoTable() throws Exception {
String inputTable1Name = createTable();
String inputTable2Name = createTable();
String insertTableName = createTable();
assertTableIsRegistered(DEFAULT_DB, inputTable1Name);
assertTableIsRegistered(DEFAULT_DB, insertTableName);
String query = "insert into " + insertTableName + " select t1.id, t1.name from " + inputTable2Name + " as t2, " + inputTable1Name + " as t1 where t1.id=t2.id";
runCommand(query);
final Set<ReadEntity> inputs = getInputs(inputTable1Name, Entity.Type.TABLE);
inputs.addAll(getInputs(inputTable2Name, Entity.Type.TABLE));
Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);
(outputs.iterator().next()).setWriteType(WriteEntity.WriteType.INSERT);
HiveHook.HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
Set<ReadEntity> expectedInputs = new TreeSet<ReadEntity>(entityComparator) {
{
addAll(inputs);
}
};
assertTableIsRegistered(DEFAULT_DB, insertTableName);
Referenceable processRef1 = validateProcess(event, expectedInputs, outputs);
//Test sorting of tbl names
SortedSet<String> sortedTblNames = new TreeSet<>();
sortedTblNames.add(inputTable1Name.toLowerCase());
sortedTblNames.add(inputTable2Name.toLowerCase());
//Verify sorted order of inputs in qualified name
Assert.assertEquals(processRef1.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), Joiner.on(SEP).join("QUERY", getQualifiedTblName(sortedTblNames.first()), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, sortedTblNames.first())).getTime(), getQualifiedTblName(sortedTblNames.last()), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, sortedTblNames.last())).getTime()) + IO_SEP + SEP + Joiner.on(SEP).join(WriteEntity.WriteType.INSERT.name(), getQualifiedTblName(insertTableName), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, insertTableName)).getTime()));
//Rerun same query. Should result in same process
runCommandWithDelay(query, 1000);
Referenceable processRef2 = validateProcess(event, expectedInputs, outputs);
Assert.assertEquals(processRef1.getId()._getId(), processRef2.getId()._getId());
}
use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project incubator-atlas by apache.
the class HiveHookIT method testExportImportUnPartitionedTable.
@Test
public void testExportImportUnPartitionedTable() throws Exception {
String tableName = createTable(false);
assertTableIsRegistered(DEFAULT_DB, tableName);
String filename = "pfile://" + mkdir("exportUnPartitioned");
String query = "export table " + tableName + " to \"" + filename + "\"";
runCommand(query);
Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR);
Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
validateHDFSPaths(processReference, OUTPUTS, filename);
validateInputTables(processReference, inputs);
//Import
String importTableName = createTable(false);
assertTableIsRegistered(DEFAULT_DB, importTableName);
query = "import table " + importTableName + " from '" + filename + "'";
runCommand(query);
outputs = getOutputs(importTableName, Entity.Type.TABLE);
validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
//Should create another process
filename = "pfile://" + mkdir("export2UnPartitioned");
query = "export table " + tableName + " to \"" + filename + "\"";
runCommand(query);
inputs = getInputs(tableName, Entity.Type.TABLE);
outputs = getOutputs(filename, Entity.Type.DFS_DIR);
validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
//import again shouyld create another process
query = "import table " + importTableName + " from '" + filename + "'";
runCommand(query);
outputs = getOutputs(importTableName, Entity.Type.TABLE);
validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
}
Aggregations