Search in sources :

Example 6 with CopyWork

use of org.apache.hadoop.hive.ql.plan.CopyWork in project hive by apache.

the class ReplCopyTask method getLoadCopyTask.

public static Task<?> getLoadCopyTask(ReplicationSpec replicationSpec, Path srcPath, Path dstPath, HiveConf conf) {
    Task<?> copyTask = null;
    LOG.debug("ReplCopyTask:getLoadCopyTask: {}=>{}", srcPath, dstPath);
    if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
        ReplCopyWork rcwork = new ReplCopyWork(srcPath, dstPath, false);
        LOG.debug("ReplCopyTask:\trcwork");
        if (replicationSpec.isLazy()) {
            LOG.debug("ReplCopyTask:\tlazy");
            rcwork.setReadSrcAsFilesList(true);
            // It is assumed isLazy flag is set only for REPL LOAD flow.
            // IMPORT always do deep copy. So, distCpDoAsUser will be null by default in ReplCopyWork.
            String distCpDoAsUser = conf.getVar(HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER);
            rcwork.setDistCpDoAsUser(distCpDoAsUser);
        }
        copyTask = TaskFactory.get(rcwork, conf);
    } else {
        LOG.debug("ReplCopyTask:\tcwork");
        copyTask = TaskFactory.get(new CopyWork(srcPath, dstPath, false), conf);
    }
    return copyTask;
}
Also used : ReplCopyWork(org.apache.hadoop.hive.ql.plan.ReplCopyWork) ReplCopyWork(org.apache.hadoop.hive.ql.plan.ReplCopyWork) CopyWork(org.apache.hadoop.hive.ql.plan.CopyWork)

Example 7 with CopyWork

use of org.apache.hadoop.hive.ql.plan.CopyWork in project hive by apache.

the class ImportSemanticAnalyzer method addSinglePartition.

private static Task<?> addSinglePartition(URI fromURI, FileSystem fs, ImportTableDesc tblDesc, Table table, Warehouse wh, AddPartitionDesc addPartitionDesc, ReplicationSpec replicationSpec, EximUtil.SemanticAnalyzerWrapperContext x, Long writeId, int stmtId, boolean isSourceMm, Task<?> commitTask) throws MetaException, IOException, HiveException {
    AddPartitionDesc.OnePartitionDesc partSpec = addPartitionDesc.getPartition(0);
    if (tblDesc.isExternal() && tblDesc.getLocation() == null) {
        x.getLOG().debug("Importing in-place: adding AddPart for partition " + partSpecToString(partSpec.getPartSpec()));
        // addPartitionDesc already has the right partition location
        @SuppressWarnings("unchecked") Task<?> addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc));
        return addPartTask;
    } else {
        String srcLocation = partSpec.getLocation();
        fixLocationInPartSpec(fs, tblDesc, table, wh, replicationSpec, partSpec, x);
        x.getLOG().debug("adding dependent CopyWork/AddPart/MoveWork for partition " + partSpecToString(partSpec.getPartSpec()) + " with source location: " + srcLocation);
        Path tgtLocation = new Path(partSpec.getLocation());
        Path destPath = !AcidUtils.isInsertOnlyTable(table.getParameters()) ? x.getCtx().getExternalTmpPath(tgtLocation) : new Path(tgtLocation, AcidUtils.deltaSubdir(writeId, writeId, stmtId));
        Path moveTaskSrc = !AcidUtils.isInsertOnlyTable(table.getParameters()) ? destPath : tgtLocation;
        if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
            Utilities.FILE_OP_LOGGER.trace("adding import work for partition with source location: " + srcLocation + "; target: " + tgtLocation + "; copy dest " + destPath + "; mm " + writeId + " (src " + isSourceMm + ") for " + partSpecToString(partSpec.getPartSpec()));
        }
        Task<?> copyTask = null;
        if (replicationSpec.isInReplicationScope()) {
            if (isSourceMm || isAcid(writeId)) {
                // Note: this is replication gap, not MM gap... Repl V2 is not ready yet.
                throw new RuntimeException("Replicating MM and ACID tables is not supported");
            }
            copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, new Path(srcLocation), destPath, x.getConf());
        } else {
            CopyWork cw = new CopyWork(new Path(srcLocation), destPath, false);
            cw.setSkipSourceMmDirs(isSourceMm);
            copyTask = TaskFactory.get(cw);
        }
        Task<?> addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc));
        // Note: this sets LoadFileType incorrectly for ACID; is that relevant for import?
        // See setLoadFileType and setIsAcidIow calls elsewhere for an example.
        LoadTableDesc loadTableWork = new LoadTableDesc(moveTaskSrc, Utilities.getTableDesc(table), partSpec.getPartSpec(), replicationSpec.isReplace() ? LoadFileType.REPLACE_ALL : LoadFileType.OVERWRITE_EXISTING, writeId);
        loadTableWork.setStmtId(stmtId);
        loadTableWork.setInheritTableSpecs(false);
        Task<?> loadPartTask = TaskFactory.get(new MoveWork(x.getInputs(), x.getOutputs(), loadTableWork, null, false));
        copyTask.addDependentTask(loadPartTask);
        addPartTask.addDependentTask(loadPartTask);
        x.getTasks().add(copyTask);
        if (commitTask != null) {
            loadPartTask.addDependentTask(commitTask);
        }
        return addPartTask;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) CopyWork(org.apache.hadoop.hive.ql.plan.CopyWork) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc)

Aggregations

CopyWork (org.apache.hadoop.hive.ql.plan.CopyWork)7 Path (org.apache.hadoop.fs.Path)4 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)4 MoveWork (org.apache.hadoop.hive.ql.plan.MoveWork)4 LoadFileType (org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType)3 ReplCopyWork (org.apache.hadoop.hive.ql.plan.ReplCopyWork)3 Database (org.apache.hadoop.hive.metastore.api.Database)2 LoadMultiFilesDesc (org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc)2 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)1 AlterTableAddPartitionDesc (org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc)1 AddPartitionDesc (org.apache.hadoop.hive.ql.plan.AddPartitionDesc)1 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)1