Search in sources :

Example 26 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project atlas by apache.

the class HiveHookIT method testInsertIntoDFSDirPartitioned.

@Test
public void testInsertIntoDFSDirPartitioned() throws Exception {
    // Test with partitioned table
    String tableName = createTable(true);
    String pFile1 = createTestDFSPath("somedfspath1");
    String query = "insert overwrite DIRECTORY '" + pFile1 + "' select id, name from " + tableName + " where dt = '" + PART_FILE + "'";
    runCommand(query);
    Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
    Set<WriteEntity> outputs = getOutputs(pFile1, Entity.Type.DFS_DIR);
    outputs.iterator().next().setWriteType(WriteEntity.WriteType.PATH_WRITE);
    Set<ReadEntity> partitionIps = new LinkedHashSet<>(inputs);
    partitionIps.addAll(getInputs(DEFAULT_DB + "@" + tableName + "@dt='" + PART_FILE + "'", Entity.Type.PARTITION));
    AtlasEntity processEntity = validateProcess(constructEvent(query, HiveOperation.QUERY, partitionIps, outputs), inputs, outputs);
    // Rerun same query with different HDFS path. Should not create another process and should update it.
    String pFile2 = createTestDFSPath("somedfspath2");
    query = "insert overwrite DIRECTORY '" + pFile2 + "' select id, name from " + tableName + " where dt = '" + PART_FILE + "'";
    runCommand(query);
    Set<WriteEntity> pFile2Outputs = getOutputs(pFile2, Entity.Type.DFS_DIR);
    pFile2Outputs.iterator().next().setWriteType(WriteEntity.WriteType.PATH_WRITE);
    // Now the process has 2 paths - one older with deleted reference to partition and another with the the latest partition
    Set<WriteEntity> p2Outputs = new LinkedHashSet<WriteEntity>() {

        {
            addAll(pFile2Outputs);
            addAll(outputs);
        }
    };
    AtlasEntity process2Entity = validateProcess(constructEvent(query, HiveOperation.QUERY, partitionIps, pFile2Outputs), inputs, p2Outputs);
    validateHDFSPaths(process2Entity, OUTPUTS, pFile2);
    Assert.assertEquals(process2Entity.getGuid(), processEntity.getGuid());
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Example 27 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project atlas by apache.

the class HiveHookIT method testLoadDFSPathPartitioned.

@Test
public void testLoadDFSPathPartitioned() throws Exception {
    String tableName = createTable(true, true, false);
    assertTableIsRegistered(DEFAULT_DB, tableName);
    String loadFile = createTestDFSFile("loadDFSFile");
    String query = "load data inpath '" + loadFile + "' into table " + tableName + " partition(dt = '" + PART_FILE + "')";
    runCommand(query);
    Set<WriteEntity> outputs = getOutputs(tableName, Entity.Type.TABLE);
    Set<ReadEntity> inputs = getInputs(loadFile, Entity.Type.DFS_DIR);
    Set<WriteEntity> partitionOps = new LinkedHashSet<>(outputs);
    partitionOps.addAll(getOutputs(DEFAULT_DB + "@" + tableName + "@dt=" + PART_FILE, Entity.Type.PARTITION));
    AtlasEntity processReference = validateProcess(constructEvent(query, HiveOperation.LOAD, inputs, partitionOps), inputs, outputs);
    validateHDFSPaths(processReference, INPUTS, loadFile);
    validateOutputTables(processReference, outputs);
    String loadFile2 = createTestDFSFile("loadDFSFile1");
    query = "load data inpath '" + loadFile2 + "' into table " + tableName + " partition(dt = '" + PART_FILE + "')";
    runCommand(query);
    Set<ReadEntity> process2Inputs = getInputs(loadFile2, Entity.Type.DFS_DIR);
    Set<ReadEntity> expectedInputs = new LinkedHashSet<>();
    expectedInputs.addAll(process2Inputs);
    expectedInputs.addAll(inputs);
    validateProcess(constructEvent(query, HiveOperation.LOAD, expectedInputs, partitionOps), expectedInputs, outputs);
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Example 28 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project atlas by apache.

the class HiveHookIT method testExportImportPartitionedTable.

@Test
public void testExportImportPartitionedTable() throws Exception {
    boolean isPartitionedTable = true;
    String tableName = createTable(isPartitionedTable);
    assertTableIsRegistered(DEFAULT_DB, tableName);
    // Add a partition
    String partFile = "pfile://" + mkdir("partition");
    String query = "alter table " + tableName + " add partition (dt='" + PART_FILE + "') location '" + partFile + "'";
    runCommand(query);
    String filename = "pfile://" + mkdir("export");
    query = "export table " + tableName + " to \"" + filename + "\"";
    runCommand(query);
    Set<ReadEntity> expectedExportInputs = getInputs(tableName, Entity.Type.TABLE);
    Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR);
    // Note that export has only partition as input in this case
    Set<ReadEntity> partitionIps = getInputs(DEFAULT_DB + "@" + tableName + "@dt=" + PART_FILE, Entity.Type.PARTITION);
    partitionIps.addAll(expectedExportInputs);
    AtlasEntity processEntity = validateProcess(constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs), expectedExportInputs, outputs);
    validateHDFSPaths(processEntity, OUTPUTS, filename);
    // Import
    String importTableName = createTable(true);
    assertTableIsRegistered(DEFAULT_DB, tableName);
    query = "import table " + importTableName + " from '" + filename + "'";
    runCommand(query);
    Set<ReadEntity> expectedImportInputs = getInputs(filename, Entity.Type.DFS_DIR);
    Set<WriteEntity> importOutputs = getOutputs(importTableName, Entity.Type.TABLE);
    Set<WriteEntity> partitionOps = getOutputs(DEFAULT_DB + "@" + importTableName + "@dt=" + PART_FILE, Entity.Type.PARTITION);
    partitionOps.addAll(importOutputs);
    validateProcess(constructEvent(query, HiveOperation.IMPORT, expectedImportInputs, partitionOps), expectedImportInputs, importOutputs);
    // Export should update same process
    filename = "pfile://" + mkdir("export2");
    query = "export table " + tableName + " to \"" + filename + "\"";
    runCommand(query);
    Set<WriteEntity> outputs2 = getOutputs(filename, Entity.Type.DFS_DIR);
    Set<WriteEntity> p3Outputs = new LinkedHashSet<WriteEntity>() {

        {
            addAll(outputs2);
            addAll(outputs);
        }
    };
    validateProcess(constructEvent(query, HiveOperation.EXPORT, partitionIps, outputs2), expectedExportInputs, p3Outputs);
    query = "alter table " + importTableName + " drop partition (dt='" + PART_FILE + "')";
    runCommand(query);
    // Import should update same process
    query = "import table " + importTableName + " from '" + filename + "'";
    runCommandWithDelay(query, 1000);
    Set<ReadEntity> importInputs = getInputs(filename, Entity.Type.DFS_DIR);
    Set<ReadEntity> expectedImport2Inputs = new LinkedHashSet<ReadEntity>() {

        {
            addAll(importInputs);
            addAll(expectedImportInputs);
        }
    };
    validateProcess(constructEvent(query, HiveOperation.IMPORT, importInputs, partitionOps), expectedImport2Inputs, importOutputs);
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Example 29 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project atlas by apache.

the class HiveHookIT method getPartitionInput.

private ReadEntity getPartitionInput() {
    ReadEntity partEntity = new ReadEntity();
    partEntity.setName(PART_FILE);
    partEntity.setTyp(Entity.Type.PARTITION);
    return partEntity;
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity)

Example 30 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project flink by apache.

the class HiveParserUtils method addInput.

public static ReadEntity addInput(Set<ReadEntity> inputs, ReadEntity newInput, boolean mergeIsDirectFlag) {
    // If the input is already present, make sure the new parent is added to the input.
    if (inputs.contains(newInput)) {
        for (ReadEntity input : inputs) {
            if (input.equals(newInput)) {
                if ((newInput.getParents() != null) && (!newInput.getParents().isEmpty())) {
                    input.getParents().addAll(newInput.getParents());
                    input.setDirect(input.isDirect() || newInput.isDirect());
                } else if (mergeIsDirectFlag) {
                    input.setDirect(input.isDirect() || newInput.isDirect());
                }
                return input;
            }
        }
        assert false;
    } else {
        inputs.add(newInput);
        return newInput;
    }
    // make compile happy
    return null;
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity)

Aggregations

ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)139 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)70 Table (org.apache.hadoop.hive.ql.metadata.Table)69 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)31 Partition (org.apache.hadoop.hive.ql.metadata.Partition)29 ArrayList (java.util.ArrayList)27 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)27 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)24 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)22 HashMap (java.util.HashMap)16 Test (org.testng.annotations.Test)16 Map (java.util.Map)13 LinkedHashMap (java.util.LinkedHashMap)12 Path (org.apache.hadoop.fs.Path)12 List (java.util.List)11 Database (org.apache.hadoop.hive.metastore.api.Database)11 AtlasEntity (org.apache.atlas.model.instance.AtlasEntity)10 Referenceable (org.apache.atlas.typesystem.Referenceable)10 HashSet (java.util.HashSet)9 FileNotFoundException (java.io.FileNotFoundException)7