Search in sources :

Example 51 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project incubator-atlas by apache.

the class HiveHookIT method testInsertIntoTable.

@Test
public void testInsertIntoTable() throws Exception {
    String inputTable1Name = createTable();
    String inputTable2Name = createTable();
    String insertTableName = createTable();
    assertTableIsRegistered(DEFAULT_DB, inputTable1Name);
    assertTableIsRegistered(DEFAULT_DB, insertTableName);
    String query = "insert into " + insertTableName + " select t1.id, t1.name from " + inputTable2Name + " as t2, " + inputTable1Name + " as t1 where t1.id=t2.id";
    runCommand(query);
    final Set<ReadEntity> inputs = getInputs(inputTable1Name, Entity.Type.TABLE);
    inputs.addAll(getInputs(inputTable2Name, Entity.Type.TABLE));
    Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);
    (outputs.iterator().next()).setWriteType(WriteEntity.WriteType.INSERT);
    HiveHook.HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
    Set<ReadEntity> expectedInputs = new TreeSet<ReadEntity>(entityComparator) {

        {
            addAll(inputs);
        }
    };
    assertTableIsRegistered(DEFAULT_DB, insertTableName);
    Referenceable processRef1 = validateProcess(event, expectedInputs, outputs);
    //Test sorting of tbl names
    SortedSet<String> sortedTblNames = new TreeSet<>();
    sortedTblNames.add(inputTable1Name.toLowerCase());
    sortedTblNames.add(inputTable2Name.toLowerCase());
    //Verify sorted order of inputs in qualified name
    Assert.assertEquals(processRef1.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), Joiner.on(SEP).join("QUERY", getQualifiedTblName(sortedTblNames.first()), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, sortedTblNames.first())).getTime(), getQualifiedTblName(sortedTblNames.last()), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, sortedTblNames.last())).getTime()) + IO_SEP + SEP + Joiner.on(SEP).join(WriteEntity.WriteType.INSERT.name(), getQualifiedTblName(insertTableName), HiveMetaStoreBridge.getTableCreatedTime(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, insertTableName)).getTime()));
    //Rerun same query. Should result in same process
    runCommandWithDelay(query, 1000);
    Referenceable processRef2 = validateProcess(event, expectedInputs, outputs);
    Assert.assertEquals(processRef1.getId()._getId(), processRef2.getId()._getId());
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Referenceable(org.apache.atlas.typesystem.Referenceable) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Example 52 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project incubator-atlas by apache.

the class HiveHookIT method testExportImportUnPartitionedTable.

@Test
public void testExportImportUnPartitionedTable() throws Exception {
    String tableName = createTable(false);
    assertTableIsRegistered(DEFAULT_DB, tableName);
    String filename = "pfile://" + mkdir("exportUnPartitioned");
    String query = "export table " + tableName + " to \"" + filename + "\"";
    runCommand(query);
    Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
    Set<WriteEntity> outputs = getOutputs(filename, Entity.Type.DFS_DIR);
    Referenceable processReference = validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
    validateHDFSPaths(processReference, OUTPUTS, filename);
    validateInputTables(processReference, inputs);
    //Import
    String importTableName = createTable(false);
    assertTableIsRegistered(DEFAULT_DB, importTableName);
    query = "import table " + importTableName + " from '" + filename + "'";
    runCommand(query);
    outputs = getOutputs(importTableName, Entity.Type.TABLE);
    validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
    //Should create another process
    filename = "pfile://" + mkdir("export2UnPartitioned");
    query = "export table " + tableName + " to \"" + filename + "\"";
    runCommand(query);
    inputs = getInputs(tableName, Entity.Type.TABLE);
    outputs = getOutputs(filename, Entity.Type.DFS_DIR);
    validateProcess(constructEvent(query, HiveOperation.EXPORT, inputs, outputs));
    //import again shouyld create another process
    query = "import table " + importTableName + " from '" + filename + "'";
    runCommand(query);
    outputs = getOutputs(importTableName, Entity.Type.TABLE);
    validateProcess(constructEvent(query, HiveOperation.IMPORT, getInputs(filename, Entity.Type.DFS_DIR), outputs));
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Referenceable(org.apache.atlas.typesystem.Referenceable) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Example 53 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.

the class DbTxnManager method acquireLocks.

/**
 * Normally client should call {@link #acquireLocks(org.apache.hadoop.hive.ql.QueryPlan, org.apache.hadoop.hive.ql.Context, String)}
 * @param isBlocking if false, the method will return immediately; thus the locks may be in LockState.WAITING
 * @return null if no locks were needed
 */
@VisibleForTesting
LockState acquireLocks(QueryPlan plan, Context ctx, String username, boolean isBlocking) throws LockException {
    init();
    // Make sure we've built the lock manager
    getLockManager();
    verifyState(plan);
    boolean atLeastOneLock = false;
    queryId = plan.getQueryId();
    switch(plan.getOperation()) {
        case SET_AUTOCOMMIT:
            /**
             *This is here for documentation purposes.  This TM doesn't support this - only has one
             * mode of operation documented at {@link DbTxnManager#isExplicitTransaction}
             */
            return null;
    }
    LockRequestBuilder rqstBuilder = new LockRequestBuilder(queryId);
    // link queryId to txnId
    LOG.info("Setting lock request transaction to " + JavaUtils.txnIdToString(txnId) + " for queryId=" + queryId);
    rqstBuilder.setTransactionId(txnId).setUser(username);
    // For each source to read, get a shared lock
    for (ReadEntity input : plan.getInputs()) {
        if (!input.needsLock() || input.isUpdateOrDelete() || !needsLock(input)) {
            // locks instead. Also, there's no need to lock temp tables since they're session wide
            continue;
        }
        LockComponentBuilder compBuilder = new LockComponentBuilder();
        compBuilder.setShared();
        compBuilder.setOperationType(DataOperationType.SELECT);
        Table t = null;
        switch(input.getType()) {
            case DATABASE:
                compBuilder.setDbName(input.getDatabase().getName());
                break;
            case TABLE:
                t = input.getTable();
                compBuilder.setDbName(t.getDbName());
                compBuilder.setTableName(t.getTableName());
                break;
            case PARTITION:
            case DUMMYPARTITION:
                compBuilder.setPartitionName(input.getPartition().getName());
                t = input.getPartition().getTable();
                compBuilder.setDbName(t.getDbName());
                compBuilder.setTableName(t.getTableName());
                break;
            default:
                // This is a file or something we don't hold locks for.
                continue;
        }
        if (t != null) {
            compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t));
        }
        LockComponent comp = compBuilder.build();
        LOG.debug("Adding lock component to lock request " + comp.toString());
        rqstBuilder.addLockComponent(comp);
        atLeastOneLock = true;
    }
    // need a SEMI-SHARED.
    for (WriteEntity output : plan.getOutputs()) {
        LOG.debug("output is null " + (output == null));
        if (output.getType() == Entity.Type.DFS_DIR || output.getType() == Entity.Type.LOCAL_DIR || !needsLock(output)) {
            // We don't lock files or directories. We also skip locking temp tables.
            continue;
        }
        LockComponentBuilder compBuilder = new LockComponentBuilder();
        Table t = null;
        switch(output.getType()) {
            case DATABASE:
                compBuilder.setDbName(output.getDatabase().getName());
                break;
            case TABLE:
            case // in case of dynamic partitioning lock the table
            DUMMYPARTITION:
                t = output.getTable();
                compBuilder.setDbName(t.getDbName());
                compBuilder.setTableName(t.getTableName());
                break;
            case PARTITION:
                compBuilder.setPartitionName(output.getPartition().getName());
                t = output.getPartition().getTable();
                compBuilder.setDbName(t.getDbName());
                compBuilder.setTableName(t.getTableName());
                break;
            default:
                // This is a file or something we don't hold locks for.
                continue;
        }
        switch(output.getWriteType()) {
            /* base this on HiveOperation instead?  this and DDL_NO_LOCK is peppered all over the code...
         Seems much cleaner if each stmt is identified as a particular HiveOperation (which I'd think
         makes sense everywhere).  This however would be problematic for merge...*/
            case DDL_EXCLUSIVE:
                compBuilder.setExclusive();
                compBuilder.setOperationType(DataOperationType.NO_TXN);
                break;
            case INSERT_OVERWRITE:
                t = getTable(output);
                if (AcidUtils.isTransactionalTable(t)) {
                    compBuilder.setSemiShared();
                    compBuilder.setOperationType(DataOperationType.UPDATE);
                } else {
                    compBuilder.setExclusive();
                    compBuilder.setOperationType(DataOperationType.NO_TXN);
                }
                break;
            case INSERT:
                assert t != null;
                if (AcidUtils.isFullAcidTable(t)) {
                    compBuilder.setShared();
                } else {
                    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_TXN_STRICT_LOCKING_MODE)) {
                        compBuilder.setExclusive();
                    } else {
                        // this is backward compatible for non-ACID resources, w/o ACID semantics
                        compBuilder.setShared();
                    }
                }
                compBuilder.setOperationType(DataOperationType.INSERT);
                break;
            case DDL_SHARED:
                compBuilder.setShared();
                compBuilder.setOperationType(DataOperationType.NO_TXN);
                break;
            case UPDATE:
                compBuilder.setSemiShared();
                compBuilder.setOperationType(DataOperationType.UPDATE);
                break;
            case DELETE:
                compBuilder.setSemiShared();
                compBuilder.setOperationType(DataOperationType.DELETE);
                break;
            case DDL_NO_LOCK:
                // No lock required here
                continue;
            default:
                throw new RuntimeException("Unknown write type " + output.getWriteType().toString());
        }
        if (t != null) {
            compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t));
        }
        compBuilder.setIsDynamicPartitionWrite(output.isDynamicPartitionWrite());
        LockComponent comp = compBuilder.build();
        LOG.debug("Adding lock component to lock request " + comp.toString());
        rqstBuilder.addLockComponent(comp);
        atLeastOneLock = true;
    }
    // this operation.
    if (!atLeastOneLock) {
        LOG.debug("No locks needed for queryId" + queryId);
        return null;
    }
    List<HiveLock> locks = new ArrayList<HiveLock>(1);
    LockState lockState = lockMgr.lock(rqstBuilder.build(), queryId, isBlocking, locks);
    ctx.setHiveLocks(locks);
    return lockState;
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) LockComponentBuilder(org.apache.hadoop.hive.metastore.LockComponentBuilder) Table(org.apache.hadoop.hive.ql.metadata.Table) ArrayList(java.util.ArrayList) LockRequestBuilder(org.apache.hadoop.hive.metastore.LockRequestBuilder) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 54 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.

the class SimpleFetchOptimizer method checkTree.

// all we can handle is LimitOperator, FilterOperator SelectOperator and final FS
// 
// for non-aggressive mode (minimal)
// 1. sampling is not allowed
// 2. for partitioned table, all filters should be targeted to partition column
// 3. SelectOperator should use only simple cast/column access
private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, TableScanOperator ts) throws HiveException {
    SplitSample splitSample = pctx.getNameToSplitSample().get(alias);
    if (!aggressive && splitSample != null) {
        return null;
    }
    if (!aggressive && ts.getConf().getTableSample() != null) {
        return null;
    }
    Table table = ts.getConf().getTableMetadata();
    if (table == null) {
        return null;
    }
    ReadEntity parent = PlanUtils.getParentViewInfo(alias, pctx.getViewAliasToInput());
    if (!table.isPartitioned()) {
        FetchData fetch = new FetchData(ts, parent, table, splitSample);
        return checkOperators(fetch, aggressive, false);
    }
    boolean bypassFilter = false;
    if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVEOPTPPD)) {
        ExprNodeDesc pruner = pctx.getOpToPartPruner().get(ts);
        if (PartitionPruner.onlyContainsPartnCols(table, pruner)) {
            bypassFilter = !pctx.getPrunedPartitions(alias, ts).hasUnknownPartitions();
        }
    }
    if (!aggressive && !bypassFilter) {
        return null;
    }
    PrunedPartitionList partitions = pctx.getPrunedPartitions(alias, ts);
    FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, bypassFilter);
    return checkOperators(fetch, aggressive, bypassFilter);
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Table(org.apache.hadoop.hive.ql.metadata.Table) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) SplitSample(org.apache.hadoop.hive.ql.parse.SplitSample) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 55 with ReadEntity

use of org.apache.hadoop.hive.ql.hooks.ReadEntity in project hive by apache.

the class DDLTask method renamePartition.

/**
 * Rename a partition in a table
 *
 * @param db
 *          Database to rename the partition.
 * @param renamePartitionDesc
 *          rename old Partition to new one.
 * @return Returns 0 when execution succeeds and above 0 if it fails.
 * @throws HiveException
 */
private int renamePartition(Hive db, RenamePartitionDesc renamePartitionDesc) throws HiveException {
    String tableName = renamePartitionDesc.getTableName();
    LinkedHashMap<String, String> oldPartSpec = renamePartitionDesc.getOldPartSpec();
    if (!allowOperationInReplicationScope(db, tableName, oldPartSpec, renamePartitionDesc.getReplicationSpec())) {
        // or the existing table is newer than our update.
        if (LOG.isDebugEnabled()) {
            LOG.debug("DDLTask: Rename Partition is skipped as table {} / partition {} is newer than update", tableName, FileUtils.makePartName(new ArrayList<>(oldPartSpec.keySet()), new ArrayList<>(oldPartSpec.values())));
        }
        return 0;
    }
    String[] names = Utilities.getDbTableName(tableName);
    if (Utils.isBootstrapDumpInProgress(db, names[0])) {
        LOG.error("DDLTask: Rename Partition not allowed as bootstrap dump in progress");
        throw new HiveException("Rename Partition: Not allowed as bootstrap dump in progress");
    }
    Table tbl = db.getTable(tableName);
    Partition oldPart = db.getPartition(tbl, oldPartSpec, false);
    if (oldPart == null) {
        String partName = FileUtils.makePartName(new ArrayList<String>(oldPartSpec.keySet()), new ArrayList<String>(oldPartSpec.values()));
        throw new HiveException("Rename partition: source partition [" + partName + "] does not exist.");
    }
    Partition part = db.getPartition(tbl, oldPartSpec, false);
    part.setValues(renamePartitionDesc.getNewPartSpec());
    db.renamePartition(tbl, oldPartSpec, part);
    Partition newPart = db.getPartition(tbl, renamePartitionDesc.getNewPartSpec(), false);
    work.getInputs().add(new ReadEntity(oldPart));
    // We've already obtained a lock on the table, don't lock the partition too
    addIfAbsentByName(new WriteEntity(newPart, WriteEntity.WriteType.DDL_NO_LOCK));
    return 0;
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) TextMetaDataTable(org.apache.hadoop.hive.ql.metadata.formatting.TextMetaDataTable) Table(org.apache.hadoop.hive.ql.metadata.Table) ArrayList(java.util.ArrayList) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Aggregations

ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)75 Table (org.apache.hadoop.hive.ql.metadata.Table)35 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)34 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)24 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)18 Partition (org.apache.hadoop.hive.ql.metadata.Partition)18 ArrayList (java.util.ArrayList)15 Referenceable (org.apache.atlas.typesystem.Referenceable)10 LinkedHashMap (java.util.LinkedHashMap)9 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)9 HashMap (java.util.HashMap)8 Test (org.testng.annotations.Test)8 Path (org.apache.hadoop.fs.Path)7 FileNotFoundException (java.io.FileNotFoundException)6 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)5 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)5 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)5 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)5 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)5 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)5