Search in sources :

Example 66 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project incubator-atlas by apache.

the class HiveHookIT method testExportImportPartitionedTable.

@Test
public void testExportImportPartitionedTable() throws Exception {
    boolean isPartitionedTable = true;
    final String tableName = createTable(isPartitionedTable);
    assertTableIsRegistered(DEFAULT_DB, tableName);
    //Add a partition
    String partFile = "pfile://" + mkdir("partition");
    String query = "alter table " + tableName + " add partition (dt='" + PART_FILE + "') location '" + partFile + "'";
    runCommand(query);
    String filename = "pfile://" + mkdir("export");
    query = "export table " + tableName + " to \"" + filename + "\"";
    runCommand(query);
    final Set<ReadEntity> expectedExportInputs = getInputs(tableName, Entity.Type.TABLE);
    final Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR);
    //Note that export has only partition as input in this case
    final Set<ReadEntity> partitionIps = getInputs(DEFAULT_DB + "@" + tableName + "@dt=" + PART_FILE, Entity.Type.PARTITION);
    partitionIps.addAll(expectedExportInputs);
    Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs), expectedExportInputs, outputs);
    validateHDFSPaths(processReference, OUTPUTS, filename);
    //Import
    String importTableName = createTable(true);
    assertTableIsRegistered(DEFAULT_DB, tableName);
    query = "import table " + importTableName + " from '" + filename + "'";
    runCommand(query);
    final Set<ReadEntity> expectedImportInputs = getInputs(filename, Entity.Type.DFS_DIR);
    final Set<WriteEntity> importOutputs = getOutputs(importTableName, Entity.Type.TABLE);
    final Set<WriteEntity> partitionOps = getOutputs(DEFAULT_DB + "@" + importTableName + "@dt=" + PART_FILE, Entity.Type.PARTITION);
    partitionOps.addAll(importOutputs);
    validateProcess(constructEvent(query, HiveOperation.IMPORT, expectedImportInputs, partitionOps), expectedImportInputs, importOutputs);
    //Export should update same process
    filename = "pfile://" + mkdir("export2");
    query = "export table " + tableName + " to \"" + filename + "\"";
    runCommand(query);
    final Set<WriteEntity> outputs2 = getOutputs(filename, Entity.Type.DFS_DIR);
    Set<WriteEntity> p3Outputs = new LinkedHashSet<WriteEntity>() {

        {
            addAll(outputs2);
            addAll(outputs);
        }
    };
    validateProcess(constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs2), expectedExportInputs, p3Outputs);
    query = "alter table " + importTableName + " drop partition (dt='" + PART_FILE + "')";
    runCommand(query);
    //Import should update same process
    query = "import table " + importTableName + " from '" + filename + "'";
    runCommandWithDelay(query, 1000);
    final Set<ReadEntity> importInputs = getInputs(filename, Entity.Type.DFS_DIR);
    final Set<ReadEntity> expectedImport2Inputs = new LinkedHashSet<ReadEntity>() {

        {
            addAll(importInputs);
            addAll(expectedImportInputs);
        }
    };
    validateProcess(constructEvent(query, HiveOperation.IMPORT, importInputs, partitionOps), expectedImport2Inputs, importOutputs);
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Referenceable(org.apache.atlas.typesystem.Referenceable) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Example 67 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project incubator-atlas by apache.

the class HiveHookIT method testLoadDFSPathPartitioned.

@Test
public void testLoadDFSPathPartitioned() throws Exception {
    String tableName = createTable(true, true, false);
    assertTableIsRegistered(DEFAULT_DB, tableName);
    final String loadFile = createTestDFSFile("loadDFSFile");
    String query = "load data inpath '" + loadFile + "' into table " + tableName + " partition(dt = '" + PART_FILE + "')";
    runCommand(query);
    final Set<WriteEntity> outputs = getOutputs(tableName, Entity.Type.TABLE);
    final Set<ReadEntity> inputs = getInputs(loadFile, Entity.Type.DFS_DIR);
    final Set<WriteEntity> partitionOps = new LinkedHashSet<>(outputs);
    partitionOps.addAll(getOutputs(DEFAULT_DB + "@" + tableName + "@dt=" + PART_FILE, Entity.Type.PARTITION));
    Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.LOAD, inputs, partitionOps), inputs, outputs);
    validateHDFSPaths(processReference, INPUTS, loadFile);
    validateOutputTables(processReference, outputs);
    final String loadFile2 = createTestDFSFile("loadDFSFile1");
    query = "load data inpath '" + loadFile2 + "' into table " + tableName + " partition(dt = '" + PART_FILE + "')";
    runCommand(query);
    Set<ReadEntity> process2Inputs = getInputs(loadFile2, Entity.Type.DFS_DIR);
    Set<ReadEntity> expectedInputs = new LinkedHashSet<>();
    expectedInputs.addAll(process2Inputs);
    expectedInputs.addAll(inputs);
    validateProcess(constructEvent(query, HiveOperation.LOAD, expectedInputs, partitionOps), expectedInputs, outputs);
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Referenceable(org.apache.atlas.typesystem.Referenceable) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Example 68 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project incubator-atlas by apache.

the class HiveHookIT method testInsertIntoTable.

@Test
public void testInsertIntoTable() throws Exception {
    String inputTable1Name = createTable();
    String inputTable2Name = createTable();
    String insertTableName = createTable();
    assertTableIsRegistered(DEFAULT_DB, inputTable1Name);
    assertTableIsRegistered(DEFAULT_DB, insertTableName);
    String query = "insert into " + insertTableName + " select t1.id, t1.name from " + inputTable2Name + " as t2, " + inputTable1Name + " as t1 where t1.id=t2.id";
    runCommand(query);
    final Set<ReadEntity> inputs = getInputs(inputTable1Name, Entity.Type.TABLE);
    inputs.addAll(getInputs(inputTable2Name, Entity.Type.TABLE));
    Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);
    (outputs.iterator().next()).setWriteType(WriteEntity.WriteType.INSERT);
    HiveHook.HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
    Set<ReadEntity> expectedInputs = new TreeSet<ReadEntity>(entityComparator) {

        {
            addAll(inputs);
        }
    };
    assertTableIsRegistered(DEFAULT_DB, insertTableName);
    Referenceable processRef1 = validateProcess(event, expectedInputs, outputs);
    //Test sorting of tbl names
    SortedSet<String> sortedTblNames = new TreeSet<>();
    sortedTblNames.add(inputTable1Name.toLowerCase());
    sortedTblNames.add(inputTable2Name.toLowerCase());
    //Verify sorted order of inputs in qualified name
    Assert.assertEquals(processRef1.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), Joiner.on(SEP).join("QUERY", getQualifiedTblName(sortedTblNames.first()), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, sortedTblNames.first())).getTime(), getQualifiedTblName(sortedTblNames.last()), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, sortedTblNames.last())).getTime()) + IO_SEP + SEP + Joiner.on(SEP).join(WriteEntity.WriteType.INSERT.name(), getQualifiedTblName(insertTableName), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, insertTableName)).getTime()));
    //Rerun same query. Should result in same process
    runCommandWithDelay(query, 1000);
    Referenceable processRef2 = validateProcess(event, expectedInputs, outputs);
    Assert.assertEquals(processRef1.getId()._getId(), processRef2.getId()._getId());
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Referenceable(org.apache.atlas.typesystem.Referenceable) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Example 69 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project incubator-atlas by apache.

the class HiveHookIT method testExportImportUnPartitionedTable.

@Test
public void testExportImportUnPartitionedTable() throws Exception {
    String tableName = createTable(false);
    assertTableIsRegistered(DEFAULT_DB, tableName);
    String filename = "pfile://" + mkdir("exportUnPartitioned");
    String query = "export table " + tableName + " to \"" + filename + "\"";
    runCommand(query);
    Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
    Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR);
    Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
    validateHDFSPaths(processReference, OUTPUTS, filename);
    validateInputTables(processReference, inputs);
    //Import
    String importTableName = createTable(false);
    assertTableIsRegistered(DEFAULT_DB, importTableName);
    query = "import table " + importTableName + " from '" + filename + "'";
    runCommand(query);
    outputs = getOutputs(importTableName, Entity.Type.TABLE);
    validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
    //Should create another process
    filename = "pfile://" + mkdir("export2UnPartitioned");
    query = "export table " + tableName + " to \"" + filename + "\"";
    runCommand(query);
    inputs = getInputs(tableName, Entity.Type.TABLE);
    outputs = getOutputs(filename, Entity.Type.DFS_DIR);
    validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
    //import again shouyld create another process
    query = "import table " + importTableName + " from '" + filename + "'";
    runCommand(query);
    outputs = getOutputs(importTableName, Entity.Type.TABLE);
    validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Referenceable(org.apache.atlas.typesystem.Referenceable) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Aggregations

ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)69 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)32 Table (org.apache.hadoop.hive.ql.metadata.Table)31 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)23 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)16 ArrayList (java.util.ArrayList)15 Partition (org.apache.hadoop.hive.ql.metadata.Partition)15 LinkedHashMap (java.util.LinkedHashMap)10 Referenceable (org.apache.atlas.typesystem.Referenceable)10 HashMap (java.util.HashMap)9 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)8 Test (org.testng.annotations.Test)8 Path (org.apache.hadoop.fs.Path)7 Map (java.util.Map)6 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)5 FileNotFoundException (java.io.FileNotFoundException)4 HashSet (java.util.HashSet)4 List (java.util.List)4 Database (org.apache.hadoop.hive.metastore.api.Database)4 IOException (java.io.IOException)3