Search in sources :

Example 46 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class ImportSemanticAnalyzer method createReplImportTasks.

/**
   * Create tasks for repl import
   */
private static void createReplImportTasks(ImportTableDesc tblDesc, List<AddPartitionDesc> partitionDescs, boolean isPartSpecSet, ReplicationSpec replicationSpec, boolean waitOnPrecursor, Table table, URI fromURI, FileSystem fs, Warehouse wh, EximUtil.SemanticAnalyzerWrapperContext x) throws HiveException, URISyntaxException, IOException, MetaException {
    Task dr = null;
    WriteEntity.WriteType lockType = WriteEntity.WriteType.DDL_NO_LOCK;
    if ((table != null) && (isPartitioned(tblDesc) != table.isPartitioned())) {
        // drop and re-create.
        if (replicationSpec.allowReplacementInto(table)) {
            dr = dropTableTask(table, x);
            lockType = WriteEntity.WriteType.DDL_EXCLUSIVE;
            // null it out so we go into the table re-create flow.
            table = null;
        } else {
            // noop out of here.
            return;
        }
    }
    // Normally, on import, trying to create a table or a partition in a db that does not yet exist
    // is a error condition. However, in the case of a REPL LOAD, it is possible that we are trying
    // to create tasks to create a table inside a db that as-of-now does not exist, but there is
    // a precursor Task waiting that will create it before this is encountered. Thus, we instantiate
    // defaults and do not error out in that case.
    Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName());
    if (parentDb == null) {
        if (!waitOnPrecursor) {
            throw new SemanticException(ErrorMsg.DATABASE_NOT_EXISTS.getMsg(tblDesc.getDatabaseName()));
        }
    }
    if (tblDesc.getLocation() == null) {
        if (!waitOnPrecursor) {
            tblDesc.setLocation(wh.getTablePath(parentDb, tblDesc.getTableName()).toString());
        } else {
            tblDesc.setLocation(wh.getDnsPath(new Path(wh.getDefaultDatabasePath(tblDesc.getDatabaseName()), MetaStoreUtils.encodeTableName(tblDesc.getTableName().toLowerCase()))).toString());
        }
    }
    if (table == null) {
        if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
            lockType = WriteEntity.WriteType.DDL_SHARED;
        }
        Task t = createTableTask(tblDesc, x);
        table = new Table(tblDesc.getDatabaseName(), tblDesc.getTableName());
        if (!replicationSpec.isMetadataOnly()) {
            if (isPartitioned(tblDesc)) {
                for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                    addPartitionDesc.setReplicationSpec(replicationSpec);
                    t.addDependentTask(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x));
                }
            } else {
                x.getLOG().debug("adding dependent CopyWork/MoveWork for table");
                t.addDependentTask(loadTable(fromURI, table, true, new Path(tblDesc.getLocation()), replicationSpec, x));
            }
        }
        if (dr == null) {
            // Simply create
            x.getTasks().add(t);
        } else {
            // Drop and recreate
            dr.addDependentTask(t);
            x.getTasks().add(dr);
        }
    } else {
        // Table existed, and is okay to replicate into, not dropping and re-creating.
        if (table.isPartitioned()) {
            x.getLOG().debug("table partitioned");
            for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                addPartitionDesc.setReplicationSpec(replicationSpec);
                Map<String, String> partSpec = addPartitionDesc.getPartition(0).getPartSpec();
                org.apache.hadoop.hive.ql.metadata.Partition ptn = null;
                if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) {
                    if (!replicationSpec.isMetadataOnly()) {
                        x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x));
                    }
                } else {
                    // the destination ptn's repl.last.id is older than the replacement's.
                    if (replicationSpec.allowReplacementInto(ptn)) {
                        if (!replicationSpec.isMetadataOnly()) {
                            x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x));
                        } else {
                            x.getTasks().add(alterSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, ptn, x));
                        }
                        if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
                            lockType = WriteEntity.WriteType.DDL_SHARED;
                        }
                    } else {
                    // ignore this ptn, do nothing, not an error.
                    }
                }
            }
            if (replicationSpec.isMetadataOnly() && partitionDescs.isEmpty()) {
                // MD-ONLY table alter
                x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec));
                if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
                    lockType = WriteEntity.WriteType.DDL_SHARED;
                }
            }
        } else {
            x.getLOG().debug("table non-partitioned");
            if (!replicationSpec.allowReplacementInto(table)) {
                // silently return, table is newer than our replacement.
                return;
            }
            if (!replicationSpec.isMetadataOnly()) {
                // repl-imports are replace-into unless the event is insert-into
                loadTable(fromURI, table, !replicationSpec.isInsert(), new Path(fromURI), replicationSpec, x);
            } else {
                x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec));
            }
            if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
                lockType = WriteEntity.WriteType.DDL_SHARED;
            }
        }
    }
    x.getOutputs().add(new WriteEntity(table, lockType));
}
Also used : Path(org.apache.hadoop.fs.Path) ReplCopyTask(org.apache.hadoop.hive.ql.exec.ReplCopyTask) Task(org.apache.hadoop.hive.ql.exec.Task) Table(org.apache.hadoop.hive.ql.metadata.Table) Database(org.apache.hadoop.hive.metastore.api.Database) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 47 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class ProcessAnalyzeTable method genTableStats.

private Task<?> genTableStats(GenTezProcContext context, TableScanOperator tableScan) throws HiveException {
    Class<? extends InputFormat> inputFormat = tableScan.getConf().getTableMetadata().getInputFormatClass();
    ParseContext parseContext = context.parseContext;
    Table table = tableScan.getConf().getTableMetadata();
    List<Partition> partitions = new ArrayList<>();
    if (table.isPartitioned()) {
        partitions.addAll(parseContext.getPrunedPartitions(tableScan).getPartitions());
        for (Partition partn : partitions) {
            LOG.debug("XXX: adding part: " + partn);
            context.outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK));
        }
    }
    TableSpec tableSpec = new TableSpec(table, partitions);
    tableScan.getConf().getTableMetadata().setTableSpec(tableSpec);
    if (inputFormat.equals(OrcInputFormat.class)) {
        // For ORC, there is no Tez Job for table stats.
        StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec());
        snjWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
        // If partition is specified, get pruned partition list
        if (partitions.size() > 0) {
            snjWork.setPrunedPartitionList(parseContext.getPrunedPartitions(tableScan));
        }
        return TaskFactory.get(snjWork, parseContext.getConf());
    } else {
        StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec());
        statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix());
        statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir());
        statsWork.setSourceTask(context.currentTask);
        statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
        return TaskFactory.get(statsWork, parseContext.getConf());
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) TableSpec(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec) Table(org.apache.hadoop.hive.ql.metadata.Table) StatsWork(org.apache.hadoop.hive.ql.plan.StatsWork) ArrayList(java.util.ArrayList) StatsNoJobWork(org.apache.hadoop.hive.ql.plan.StatsNoJobWork) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 48 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class TestDbTxnManager method addPartitionOutput.

private WriteEntity addPartitionOutput(Table t, WriteEntity.WriteType writeType) throws Exception {
    Map<String, String> partSpec = new HashMap<String, String>();
    partSpec.put("version", Integer.toString(nextInput++));
    Partition p = new Partition(t, partSpec, new Path("/dev/null"));
    WriteEntity we = new WriteEntity(p, writeType);
    writeEntities.add(we);
    return we;
}
Also used : Path(org.apache.hadoop.fs.Path) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) Partition(org.apache.hadoop.hive.ql.metadata.Partition) HashMap(java.util.HashMap) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 49 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class TestDbTxnManager method addDynamicPartitionedOutput.

private WriteEntity addDynamicPartitionedOutput(Table t, WriteEntity.WriteType writeType) throws Exception {
    DummyPartition dp = new DummyPartition(t, "no clue what I should call this");
    WriteEntity we = new WriteEntity(dp, writeType, false);
    writeEntities.add(we);
    return we;
}
Also used : DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 50 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.

the class HiveHook method addOutputs.

private static void addOutputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet<WriteEntity> sortedOutputs, StringBuilder buffer, final Map<WriteEntity, Referenceable> refs, final boolean ignoreHDFSPathsInQFName) throws HiveException {
    if (refs != null) {
        Set<String> dataSetsProcessed = new LinkedHashSet<>();
        if (sortedOutputs != null) {
            for (WriteEntity output : sortedOutputs) {
                final Entity entity = output;
                if (!dataSetsProcessed.contains(output.getName().toLowerCase())) {
                    //HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
                    if (addQueryType(op, (WriteEntity) entity)) {
                        buffer.append(SEP);
                        buffer.append(((WriteEntity) entity).getWriteType().name());
                    }
                    if (ignoreHDFSPathsInQFName && (Type.DFS_DIR.equals(output.getType()) || Type.LOCAL_DIR.equals(output.getType()))) {
                        LOG.debug("Skipping dfs dir output addition to process qualified name {} ", output.getName());
                    } else if (refs.containsKey(output)) {
                        if (output.getType() == Type.PARTITION || output.getType() == Type.TABLE) {
                            final Date createTime = HiveMetaStoreBridge.getTableCreatedTime(hiveBridge.hiveClient.getTable(output.getTable().getDbName(), output.getTable().getTableName()));
                            addDataset(buffer, refs.get(output), createTime.getTime());
                        } else {
                            addDataset(buffer, refs.get(output));
                        }
                    }
                    dataSetsProcessed.add(output.getName().toLowerCase());
                }
            }
        }
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Entity(org.apache.hadoop.hive.ql.hooks.Entity) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Date(java.util.Date)

Aggregations

WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)88 Table (org.apache.hadoop.hive.ql.metadata.Table)39 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)35 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)24 Partition (org.apache.hadoop.hive.ql.metadata.Partition)24 ArrayList (java.util.ArrayList)18 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)14 Path (org.apache.hadoop.fs.Path)13 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)13 Referenceable (org.apache.atlas.typesystem.Referenceable)11 Database (org.apache.hadoop.hive.metastore.api.Database)11 Test (org.junit.Test)11 QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)10 HashMap (java.util.HashMap)9 LinkedHashMap (java.util.LinkedHashMap)9 Test (org.testng.annotations.Test)9 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)8 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)8 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)8 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)8