Search in sources :

Example 16 with AddPartitionDesc

use of org.apache.hadoop.hive.ql.plan.AddPartitionDesc in project hive by apache.

the class ImportSemanticAnalyzer method createReplImportTasks.

/**
 * Create tasks for repl import
 */
private static void createReplImportTasks(ImportTableDesc tblDesc, List<AddPartitionDesc> partitionDescs, ReplicationSpec replicationSpec, boolean waitOnPrecursor, Table table, URI fromURI, FileSystem fs, Warehouse wh, EximUtil.SemanticAnalyzerWrapperContext x, Long writeId, int stmtId, boolean isSourceMm, UpdatedMetaDataTracker updatedMetadata) throws HiveException, URISyntaxException, IOException, MetaException {
    Task<?> dr = null;
    WriteEntity.WriteType lockType = WriteEntity.WriteType.DDL_NO_LOCK;
    // Normally, on import, trying to create a table or a partition in a db that does not yet exist
    // is a error condition. However, in the case of a REPL LOAD, it is possible that we are trying
    // to create tasks to create a table inside a db that as-of-now does not exist, but there is
    // a precursor Task waiting that will create it before this is encountered. Thus, we instantiate
    // defaults and do not error out in that case.
    Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName());
    if (parentDb == null) {
        if (!waitOnPrecursor) {
            throw new SemanticException(ErrorMsg.DATABASE_NOT_EXISTS.getMsg(tblDesc.getDatabaseName()));
        }
    }
    if (table != null) {
        if (!replicationSpec.allowReplacementInto(table.getParameters())) {
            // If the target table exists and is newer or same as current update based on repl.last.id, then just noop it.
            x.getLOG().info("Table {}.{} is not replaced as it is newer than the update", tblDesc.getDatabaseName(), tblDesc.getTableName());
            return;
        }
    } else {
        // If table doesn't exist, allow creating a new one only if the database state is older than the update.
        if ((parentDb != null) && (!replicationSpec.allowReplacementInto(parentDb.getParameters()))) {
            // If the target table exists and is newer or same as current update based on repl.last.id, then just noop it.
            x.getLOG().info("Table {}.{} is not created as the database is newer than the update", tblDesc.getDatabaseName(), tblDesc.getTableName());
            return;
        }
    }
    if (updatedMetadata != null) {
        updatedMetadata.set(replicationSpec.getReplicationState(), tblDesc.getDatabaseName(), tblDesc.getTableName(), null);
    }
    if (tblDesc.getLocation() == null) {
        if (!waitOnPrecursor) {
            tblDesc.setLocation(wh.getDefaultTablePath(parentDb, tblDesc.getTableName()).toString());
        } else {
            tblDesc.setLocation(wh.getDnsPath(new Path(wh.getDefaultDatabasePath(tblDesc.getDatabaseName()), org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.encodeTableName(tblDesc.getTableName().toLowerCase()))).toString());
        }
    }
    /* Note: In the following section, Metadata-only import handling logic is
       interleaved with regular repl-import logic. The rule of thumb being
       followed here is that MD-only imports are essentially ALTERs. They do
       not load data, and should not be "creating" any metadata - they should
       be replacing instead. The only place it makes sense for a MD-only import
       to create is in the case of a table that's been dropped and recreated,
       or in the case of an unpartitioned table. In all other cases, it should
       behave like a noop or a pure MD alter.
    */
    if (table == null) {
        if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
            lockType = WriteEntity.WriteType.DDL_SHARED;
        }
        Task t = createTableTask(tblDesc, x);
        table = new Table(tblDesc.getDatabaseName(), tblDesc.getTableName());
        if (!replicationSpec.isMetadataOnly()) {
            if (isPartitioned(tblDesc)) {
                Task<?> ict = createImportCommitTask(tblDesc.getDatabaseName(), tblDesc.getTableName(), writeId, stmtId, AcidUtils.isInsertOnlyTable(tblDesc.getTblProps()));
                for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                    addPartitionDesc.setReplicationSpec(replicationSpec);
                    t.addDependentTask(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId, isSourceMm, ict));
                    if (updatedMetadata != null) {
                        updatedMetadata.addPartition(addPartitionDesc.getPartition(0).getPartSpec());
                    }
                }
            } else {
                x.getLOG().debug("adding dependent CopyWork/MoveWork for table");
                t.addDependentTask(loadTable(fromURI, table, true, new Path(tblDesc.getLocation()), replicationSpec, x, writeId, stmtId, isSourceMm));
            }
        }
        // Simply create
        x.getTasks().add(t);
    } else {
        // Table existed, and is okay to replicate into, not dropping and re-creating.
        if (table.isPartitioned()) {
            x.getLOG().debug("table partitioned");
            for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                addPartitionDesc.setReplicationSpec(replicationSpec);
                Map<String, String> partSpec = addPartitionDesc.getPartition(0).getPartSpec();
                org.apache.hadoop.hive.ql.metadata.Partition ptn = null;
                Task<?> ict = replicationSpec.isMetadataOnly() ? null : createImportCommitTask(tblDesc.getDatabaseName(), tblDesc.getTableName(), writeId, stmtId, AcidUtils.isInsertOnlyTable(tblDesc.getTblProps()));
                if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) {
                    if (!replicationSpec.isMetadataOnly()) {
                        x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId, isSourceMm, ict));
                        if (updatedMetadata != null) {
                            updatedMetadata.addPartition(addPartitionDesc.getPartition(0).getPartSpec());
                        }
                    } else {
                        x.getTasks().add(alterSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, null, x));
                        if (updatedMetadata != null) {
                            updatedMetadata.addPartition(addPartitionDesc.getPartition(0).getPartSpec());
                        }
                    }
                } else {
                    // the destination ptn's repl.last.id is older than the replacement's.
                    if (replicationSpec.allowReplacementInto(ptn.getParameters())) {
                        if (!replicationSpec.isMetadataOnly()) {
                            x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId, isSourceMm, ict));
                        } else {
                            x.getTasks().add(alterSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, ptn, x));
                        }
                        if (updatedMetadata != null) {
                            updatedMetadata.addPartition(addPartitionDesc.getPartition(0).getPartSpec());
                        }
                        if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
                            lockType = WriteEntity.WriteType.DDL_SHARED;
                        }
                    }
                }
            }
            if (replicationSpec.isMetadataOnly() && partitionDescs.isEmpty()) {
                // MD-ONLY table alter
                x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec));
                if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
                    lockType = WriteEntity.WriteType.DDL_SHARED;
                }
            }
        } else {
            x.getLOG().debug("table non-partitioned");
            if (!replicationSpec.isMetadataOnly()) {
                // repl-imports are replace-into unless the event is insert-into
                loadTable(fromURI, table, replicationSpec.isReplace(), new Path(fromURI), replicationSpec, x, writeId, stmtId, isSourceMm);
            } else {
                x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec));
            }
            if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
                lockType = WriteEntity.WriteType.DDL_SHARED;
            }
        }
    }
    x.getOutputs().add(new WriteEntity(table, lockType));
}
Also used : Path(org.apache.hadoop.fs.Path) ReplCopyTask(org.apache.hadoop.hive.ql.exec.ReplCopyTask) Task(org.apache.hadoop.hive.ql.exec.Task) Table(org.apache.hadoop.hive.ql.metadata.Table) Database(org.apache.hadoop.hive.metastore.api.Database) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 17 with AddPartitionDesc

use of org.apache.hadoop.hive.ql.plan.AddPartitionDesc in project hive by apache.

the class ImportSemanticAnalyzer method createRegularImportTasks.

/**
 * Create tasks for regular import, no repl complexity
 * @param tblDesc
 * @param partitionDescs
 * @param isPartSpecSet
 * @param replicationSpec
 * @param table
 * @param fromURI
 * @param fs
 * @param wh
 */
private static void createRegularImportTasks(ImportTableDesc tblDesc, List<AddPartitionDesc> partitionDescs, boolean isPartSpecSet, ReplicationSpec replicationSpec, Table table, URI fromURI, FileSystem fs, Warehouse wh, EximUtil.SemanticAnalyzerWrapperContext x, Long writeId, int stmtId, boolean isSourceMm) throws HiveException, URISyntaxException, IOException, MetaException {
    if (table != null) {
        if (table.isPartitioned()) {
            x.getLOG().debug("table partitioned");
            Task<?> ict = createImportCommitTask(table.getDbName(), table.getTableName(), writeId, stmtId, AcidUtils.isInsertOnlyTable(table.getParameters()));
            for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                Map<String, String> partSpec = addPartitionDesc.getPartition(0).getPartSpec();
                org.apache.hadoop.hive.ql.metadata.Partition ptn = null;
                if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) {
                    x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId, isSourceMm, ict));
                } else {
                    throw new SemanticException(ErrorMsg.PARTITION_EXISTS.getMsg(partSpecToString(partSpec)));
                }
            }
        } else {
            x.getLOG().debug("table non-partitioned");
            // ensure if destination is not empty only for regular import
            Path tgtPath = new Path(table.getDataLocation().toString());
            FileSystem tgtFs = FileSystem.get(tgtPath.toUri(), x.getConf());
            checkTargetLocationEmpty(tgtFs, tgtPath, replicationSpec, x.getLOG());
            loadTable(fromURI, table, false, tgtPath, replicationSpec, x, writeId, stmtId, isSourceMm);
        }
        // Set this to read because we can't overwrite any existing partitions
        x.getOutputs().add(new WriteEntity(table, WriteEntity.WriteType.DDL_NO_LOCK));
    } else {
        x.getLOG().debug("table " + tblDesc.getTableName() + " does not exist");
        Task<?> t = createTableTask(tblDesc, x);
        table = new Table(tblDesc.getDatabaseName(), tblDesc.getTableName());
        Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName());
        // Since we are going to be creating a new table in a db, we should mark that db as a write entity
        // so that the auth framework can go to work there.
        x.getOutputs().add(new WriteEntity(parentDb, WriteEntity.WriteType.DDL_SHARED));
        if (isPartitioned(tblDesc)) {
            Task<?> ict = createImportCommitTask(tblDesc.getDatabaseName(), tblDesc.getTableName(), writeId, stmtId, AcidUtils.isInsertOnlyTable(tblDesc.getTblProps()));
            for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                t.addDependentTask(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId, isSourceMm, ict));
            }
        } else {
            x.getLOG().debug("adding dependent CopyWork/MoveWork for table");
            if (tblDesc.isExternal() && (tblDesc.getLocation() == null)) {
                x.getLOG().debug("Importing in place, no emptiness check, no copying/loading");
                Path dataPath = new Path(fromURI.toString(), EximUtil.DATA_PATH_NAME);
                tblDesc.setLocation(dataPath.toString());
            } else {
                Path tablePath = null;
                if (tblDesc.getLocation() != null) {
                    tablePath = new Path(tblDesc.getLocation());
                } else {
                    tablePath = wh.getDefaultTablePath(parentDb, tblDesc.getTableName());
                }
                FileSystem tgtFs = FileSystem.get(tablePath.toUri(), x.getConf());
                checkTargetLocationEmpty(tgtFs, tablePath, replicationSpec, x.getLOG());
                if (isSourceMm) {
                    // since target table doesn't exist, it should inherit soruce table's properties
                    Map<String, String> tblproperties = table.getParameters();
                    tblproperties.put("transactional", "true");
                    tblproperties.put("transactional_properties", "insert_only");
                    table.setParameters(tblproperties);
                }
                t.addDependentTask(loadTable(fromURI, table, false, tablePath, replicationSpec, x, writeId, stmtId, isSourceMm));
            }
        }
        x.getTasks().add(t);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.apache.hadoop.hive.ql.metadata.Table) FileSystem(org.apache.hadoop.fs.FileSystem) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc) Database(org.apache.hadoop.hive.metastore.api.Database) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Aggregations

AddPartitionDesc (org.apache.hadoop.hive.ql.plan.AddPartitionDesc)17 Path (org.apache.hadoop.fs.Path)11 Table (org.apache.hadoop.hive.ql.metadata.Table)7 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)5 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 Database (org.apache.hadoop.hive.metastore.api.Database)4 IOException (java.io.IOException)3 URISyntaxException (java.net.URISyntaxException)3 ArrayList (java.util.ArrayList)3 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)3 URI (java.net.URI)2 HashSet (java.util.HashSet)2 Warehouse (org.apache.hadoop.hive.metastore.Warehouse)2 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)2 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)2 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)2 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)2