Search in sources :

Example 56 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.

the class HiveHookIT method testTruncateTable.

@Test
public void testTruncateTable() throws Exception {
    String tableName = createTable(false);
    String query = String.format("truncate table %s", tableName);
    runCommand(query);
    Set<WriteEntity> outputs = getOutputs(tableName, Entity.Type.TABLE);
    String tableId = assertTableIsRegistered(DEFAULT_DB, tableName);
    validateProcess(constructEvent(query, HiveOperation.TRUNCATETABLE, null, outputs));
    //Check lineage
    String datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName);
    JSONObject response = atlasClient.getInputGraph(datasetName);
    JSONObject vertices = response.getJSONObject("values").getJSONObject("vertices");
    //Below should be assertTrue - Fix https://issues.apache.org/jira/browse/ATLAS-653
    Assert.assertFalse(vertices.has(tableId));
}
Also used : JSONObject(org.codehaus.jettison.json.JSONObject) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Example 57 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.

the class HiveHookIT method getOutputs.

private Set<WriteEntity> getOutputs(String inputName, Entity.Type entityType) throws HiveException {
    final WriteEntity entity = new WriteEntity();
    if (Entity.Type.DFS_DIR.equals(entityType) || Entity.Type.LOCAL_DIR.equals(entityType)) {
        entity.setName(lower(new Path(inputName).toString()));
        entity.setTyp(entityType);
    } else {
        entity.setName(getQualifiedTblName(inputName));
        entity.setTyp(entityType);
    }
    if (entityType == Entity.Type.TABLE) {
        entity.setT(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, inputName));
    }
    return new LinkedHashSet<WriteEntity>() {

        {
            add(entity);
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 58 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.

the class HiveHookIT method testInsertIntoTable.

@Test
public void testInsertIntoTable() throws Exception {
    String inputTable1Name = createTable();
    String inputTable2Name = createTable();
    String insertTableName = createTable();
    assertTableIsRegistered(DEFAULT_DB, inputTable1Name);
    assertTableIsRegistered(DEFAULT_DB, insertTableName);
    String query = "insert into " + insertTableName + " select t1.id, t1.name from " + inputTable2Name + " as t2, " + inputTable1Name + " as t1 where t1.id=t2.id";
    runCommand(query);
    final Set<ReadEntity> inputs = getInputs(inputTable1Name, Entity.Type.TABLE);
    inputs.addAll(getInputs(inputTable2Name, Entity.Type.TABLE));
    Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);
    (outputs.iterator().next()).setWriteType(WriteEntity.WriteType.INSERT);
    HiveHook.HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
    Set<ReadEntity> expectedInputs = new TreeSet<ReadEntity>(entityComparator) {

        {
            addAll(inputs);
        }
    };
    assertTableIsRegistered(DEFAULT_DB, insertTableName);
    Referenceable processRef1 = validateProcess(event, expectedInputs, outputs);
    //Test sorting of tbl names
    SortedSet<String> sortedTblNames = new TreeSet<>();
    sortedTblNames.add(inputTable1Name.toLowerCase());
    sortedTblNames.add(inputTable2Name.toLowerCase());
    //Verify sorted order of inputs in qualified name
    Assert.assertEquals(processRef1.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), Joiner.on(SEP).join("QUERY", getQualifiedTblName(sortedTblNames.first()), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, sortedTblNames.first())).getTime(), getQualifiedTblName(sortedTblNames.last()), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, sortedTblNames.last())).getTime()) + IO_SEP + SEP + Joiner.on(SEP).join(WriteEntity.WriteType.INSERT.name(), getQualifiedTblName(insertTableName), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, insertTableName)).getTime()));
    //Rerun same query. Should result in same process
    runCommandWithDelay(query, 1000);
    Referenceable processRef2 = validateProcess(event, expectedInputs, outputs);
    Assert.assertEquals(processRef1.getId()._getId(), processRef2.getId()._getId());
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Referenceable(org.apache.atlas.typesystem.Referenceable) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Example 59 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.

the class HiveHookIT method testExportImportUnPartitionedTable.

@Test
public void testExportImportUnPartitionedTable() throws Exception {
    String tableName = createTable(false);
    assertTableIsRegistered(DEFAULT_DB, tableName);
    String filename = "pfile://" + mkdir("exportUnPartitioned");
    String query = "export table " + tableName + " to \"" + filename + "\"";
    runCommand(query);
    Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
    Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR);
    Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
    validateHDFSPaths(processReference, OUTPUTS, filename);
    validateInputTables(processReference, inputs);
    //Import
    String importTableName = createTable(false);
    assertTableIsRegistered(DEFAULT_DB, importTableName);
    query = "import table " + importTableName + " from '" + filename + "'";
    runCommand(query);
    outputs = getOutputs(importTableName, Entity.Type.TABLE);
    validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
    //Should create another process
    filename = "pfile://" + mkdir("export2UnPartitioned");
    query = "export table " + tableName + " to \"" + filename + "\"";
    runCommand(query);
    inputs = getInputs(tableName, Entity.Type.TABLE);
    outputs = getOutputs(filename, Entity.Type.DFS_DIR);
    validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
    //import again shouyld create another process
    query = "import table " + importTableName + " from '" + filename + "'";
    runCommand(query);
    outputs = getOutputs(importTableName, Entity.Type.TABLE);
    validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Referenceable(org.apache.atlas.typesystem.Referenceable) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Example 60 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class DbTxnManager method acquireLocks.

/**
 * Normally client should call {@link #acquireLocks(org.apache.hadoop.hive.ql.QueryPlan, org.apache.hadoop.hive.ql.Context, String)}
 * @param isBlocking if false, the method will return immediately; thus the locks may be in LockState.WAITING
 * @return null if no locks were needed
 */
@VisibleForTesting
LockState acquireLocks(QueryPlan plan, Context ctx, String username, boolean isBlocking) throws LockException {
    init();
    // Make sure we've built the lock manager
    getLockManager();
    verifyState(plan);
    boolean atLeastOneLock = false;
    queryId = plan.getQueryId();
    switch(plan.getOperation()) {
        case SET_AUTOCOMMIT:
            /**
             *This is here for documentation purposes.  This TM doesn't support this - only has one
             * mode of operation documented at {@link DbTxnManager#isExplicitTransaction}
             */
            return null;
    }
    LockRequestBuilder rqstBuilder = new LockRequestBuilder(queryId);
    // link queryId to txnId
    LOG.info("Setting lock request transaction to " + JavaUtils.txnIdToString(txnId) + " for queryId=" + queryId);
    rqstBuilder.setTransactionId(txnId).setUser(username);
    // For each source to read, get a shared lock
    for (ReadEntity input : plan.getInputs()) {
        if (!input.needsLock() || input.isUpdateOrDelete() || !needsLock(input)) {
            // locks instead. Also, there's no need to lock temp tables since they're session wide
            continue;
        }
        LockComponentBuilder compBuilder = new LockComponentBuilder();
        compBuilder.setShared();
        compBuilder.setOperationType(DataOperationType.SELECT);
        Table t = null;
        switch(input.getType()) {
            case DATABASE:
                compBuilder.setDbName(input.getDatabase().getName());
                break;
            case TABLE:
                t = input.getTable();
                compBuilder.setDbName(t.getDbName());
                compBuilder.setTableName(t.getTableName());
                break;
            case PARTITION:
            case DUMMYPARTITION:
                compBuilder.setPartitionName(input.getPartition().getName());
                t = input.getPartition().getTable();
                compBuilder.setDbName(t.getDbName());
                compBuilder.setTableName(t.getTableName());
                break;
            default:
                // This is a file or something we don't hold locks for.
                continue;
        }
        if (t != null) {
            compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t));
        }
        LockComponent comp = compBuilder.build();
        LOG.debug("Adding lock component to lock request " + comp.toString());
        rqstBuilder.addLockComponent(comp);
        atLeastOneLock = true;
    }
    // need a SEMI-SHARED.
    for (WriteEntity output : plan.getOutputs()) {
        LOG.debug("output is null " + (output == null));
        if (output.getType() == Entity.Type.DFS_DIR || output.getType() == Entity.Type.LOCAL_DIR || !needsLock(output)) {
            // We don't lock files or directories. We also skip locking temp tables.
            continue;
        }
        LockComponentBuilder compBuilder = new LockComponentBuilder();
        Table t = null;
        switch(output.getType()) {
            case DATABASE:
                compBuilder.setDbName(output.getDatabase().getName());
                break;
            case TABLE:
            case // in case of dynamic partitioning lock the table
            DUMMYPARTITION:
                t = output.getTable();
                compBuilder.setDbName(t.getDbName());
                compBuilder.setTableName(t.getTableName());
                break;
            case PARTITION:
                compBuilder.setPartitionName(output.getPartition().getName());
                t = output.getPartition().getTable();
                compBuilder.setDbName(t.getDbName());
                compBuilder.setTableName(t.getTableName());
                break;
            default:
                // This is a file or something we don't hold locks for.
                continue;
        }
        switch(output.getWriteType()) {
            /* base this on HiveOperation instead?  this and DDL_NO_LOCK is peppered all over the code...
         Seems much cleaner if each stmt is identified as a particular HiveOperation (which I'd think
         makes sense everywhere).  This however would be problematic for merge...*/
            case DDL_EXCLUSIVE:
                compBuilder.setExclusive();
                compBuilder.setOperationType(DataOperationType.NO_TXN);
                break;
            case INSERT_OVERWRITE:
                t = getTable(output);
                if (AcidUtils.isTransactionalTable(t)) {
                    compBuilder.setSemiShared();
                    compBuilder.setOperationType(DataOperationType.UPDATE);
                } else {
                    compBuilder.setExclusive();
                    compBuilder.setOperationType(DataOperationType.NO_TXN);
                }
                break;
            case INSERT:
                assert t != null;
                if (AcidUtils.isFullAcidTable(t)) {
                    compBuilder.setShared();
                } else {
                    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_TXN_STRICT_LOCKING_MODE)) {
                        compBuilder.setExclusive();
                    } else {
                        // this is backward compatible for non-ACID resources, w/o ACID semantics
                        compBuilder.setShared();
                    }
                }
                compBuilder.setOperationType(DataOperationType.INSERT);
                break;
            case DDL_SHARED:
                compBuilder.setShared();
                compBuilder.setOperationType(DataOperationType.NO_TXN);
                break;
            case UPDATE:
                compBuilder.setSemiShared();
                compBuilder.setOperationType(DataOperationType.UPDATE);
                break;
            case DELETE:
                compBuilder.setSemiShared();
                compBuilder.setOperationType(DataOperationType.DELETE);
                break;
            case DDL_NO_LOCK:
                // No lock required here
                continue;
            default:
                throw new RuntimeException("Unknown write type " + output.getWriteType().toString());
        }
        if (t != null) {
            compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t));
        }
        compBuilder.setIsDynamicPartitionWrite(output.isDynamicPartitionWrite());
        LockComponent comp = compBuilder.build();
        LOG.debug("Adding lock component to lock request " + comp.toString());
        rqstBuilder.addLockComponent(comp);
        atLeastOneLock = true;
    }
    // this operation.
    if (!atLeastOneLock) {
        LOG.debug("No locks needed for queryId" + queryId);
        return null;
    }
    List<HiveLock> locks = new ArrayList<HiveLock>(1);
    LockState lockState = lockMgr.lock(rqstBuilder.build(), queryId, isBlocking, locks);
    ctx.setHiveLocks(locks);
    return lockState;
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) LockComponentBuilder(org.apache.hadoop.hive.metastore.LockComponentBuilder) Table(org.apache.hadoop.hive.ql.metadata.Table) ArrayList(java.util.ArrayList) LockRequestBuilder(org.apache.hadoop.hive.metastore.LockRequestBuilder) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)88 Table (org.apache.hadoop.hive.ql.metadata.Table)39 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)35 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)24 Partition (org.apache.hadoop.hive.ql.metadata.Partition)24 ArrayList (java.util.ArrayList)18 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)14 Path (org.apache.hadoop.fs.Path)13 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)13 Referenceable (org.apache.atlas.typesystem.Referenceable)11 Database (org.apache.hadoop.hive.metastore.api.Database)11 Test (org.junit.Test)11 QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)10 HashMap (java.util.HashMap)9 LinkedHashMap (java.util.LinkedHashMap)9 Test (org.testng.annotations.Test)9 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)8 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)8 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)8 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)8