Search in sources :

Example 1 with ReplCopyWork

use of org.apache.hadoop.hive.ql.plan.ReplCopyWork in project hive by apache.

the class ReplCopyTask method execute.

@Override
protected int execute(DriverContext driverContext) {
    LOG.debug("ReplCopyTask.execute()");
    FileSystem dstFs = null;
    Path toPath = null;
    try {
        // Not clear of ReplCopyWork should inherit from CopyWork.
        if (work.getFromPaths().length > 1 || work.getToPaths().length > 1) {
            throw new RuntimeException("Invalid ReplCopyWork: " + work.getFromPaths() + ", " + work.getToPaths());
        }
        Path fromPath = work.getFromPaths()[0];
        toPath = work.getToPaths()[0];
        console.printInfo("Copying data from " + fromPath.toString(), " to " + toPath.toString());
        ReplCopyWork rwork = ((ReplCopyWork) work);
        FileSystem srcFs = fromPath.getFileSystem(conf);
        dstFs = toPath.getFileSystem(conf);
        // be a CM uri in the from path.
        if (ReplChangeManager.isCMFileUri(fromPath, srcFs)) {
            String[] result = ReplChangeManager.getFileWithChksumFromURI(fromPath.toString());
            ReplChangeManager.FileInfo sourceInfo = ReplChangeManager.getFileInfo(new Path(result[0]), result[1], conf);
            if (FileUtils.copy(sourceInfo.getSrcFs(), sourceInfo.getSourcePath(), dstFs, toPath, false, false, conf)) {
                return 0;
            } else {
                console.printError("Failed to copy: '" + fromPath.toString() + "to: '" + toPath.toString() + "'");
                return 1;
            }
        }
        List<ReplChangeManager.FileInfo> srcFiles = new ArrayList<>();
        if (rwork.readSrcAsFilesList()) {
            // This flow is usually taken for REPL LOAD
            // Our input is the result of a _files listing, we should expand out _files.
            srcFiles = filesInFileListing(srcFs, fromPath);
            if (LOG.isDebugEnabled()) {
                LOG.debug("ReplCopyTask _files contains: {}", (srcFiles == null ? "null" : srcFiles.size()));
            }
            if ((srcFiles == null) || (srcFiles.isEmpty())) {
                if (work.isErrorOnSrcEmpty()) {
                    console.printError("No _files entry found on source: " + fromPath.toString());
                    return 5;
                } else {
                    return 0;
                }
            }
        } else {
            // This flow is usually taken for IMPORT command
            FileStatus[] srcs = LoadSemanticAnalyzer.matchFilesOrDir(srcFs, fromPath);
            if (LOG.isDebugEnabled()) {
                LOG.debug("ReplCopyTasks srcs= {}", (srcs == null ? "null" : srcs.length));
            }
            if (srcs == null || srcs.length == 0) {
                if (work.isErrorOnSrcEmpty()) {
                    console.printError("No files matching path: " + fromPath.toString());
                    return 3;
                } else {
                    return 0;
                }
            }
            for (FileStatus oneSrc : srcs) {
                console.printInfo("Copying file: " + oneSrc.getPath().toString());
                LOG.debug("ReplCopyTask :cp:{}=>{}", oneSrc.getPath(), toPath);
                srcFiles.add(new ReplChangeManager.FileInfo(oneSrc.getPath().getFileSystem(conf), oneSrc.getPath()));
            }
        }
        LOG.debug("ReplCopyTask numFiles: {}", srcFiles.size());
        if (!FileUtils.mkdir(dstFs, toPath, conf)) {
            console.printError("Cannot make target directory: " + toPath.toString());
            return 2;
        }
        // Copy the files from different source file systems to one destination directory
        new CopyUtils(rwork.distCpDoAsUser(), conf).copyAndVerify(dstFs, toPath, srcFiles);
        // where the first event refers to source path and  second event refers to CM path
        for (ReplChangeManager.FileInfo srcFile : srcFiles) {
            if (srcFile.isUseSourcePath()) {
                continue;
            }
            String destFileName = srcFile.getCmPath().getName();
            Path destFile = new Path(toPath, destFileName);
            if (dstFs.exists(destFile)) {
                String destFileWithSourceName = srcFile.getSourcePath().getName();
                Path newDestFile = new Path(toPath, destFileWithSourceName);
                boolean result = dstFs.rename(destFile, newDestFile);
                if (!result) {
                    throw new IllegalStateException("could not rename " + destFile.getName() + " to " + newDestFile.getName());
                }
            }
        }
        return 0;
    } catch (Exception e) {
        console.printError("Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e));
        return (1);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ReplCopyWork(org.apache.hadoop.hive.ql.plan.ReplCopyWork) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) ReplChangeManager(org.apache.hadoop.hive.metastore.ReplChangeManager) CopyUtils(org.apache.hadoop.hive.ql.parse.repl.CopyUtils)

Example 2 with ReplCopyWork

use of org.apache.hadoop.hive.ql.plan.ReplCopyWork in project hive by apache.

the class ReplCopyTask method getDumpCopyTask.

public static Task<?> getDumpCopyTask(ReplicationSpec replicationSpec, Path srcPath, Path dstPath, HiveConf conf) {
    Task<?> copyTask = null;
    LOG.debug("ReplCopyTask:getDumpCopyTask: " + srcPath + "=>" + dstPath);
    if (replicationSpec.isInReplicationScope()) {
        ReplCopyWork rcwork = new ReplCopyWork(srcPath, dstPath, false);
        LOG.debug("ReplCopyTask:\trcwork");
        if (replicationSpec.isLazy()) {
            LOG.debug("ReplCopyTask:\tlazy");
            rcwork.setListFilesOnOutputBehaviour(true);
        }
        copyTask = TaskFactory.get(rcwork, conf);
    } else {
        LOG.debug("ReplCopyTask:\tcwork");
        copyTask = TaskFactory.get(new CopyWork(srcPath, dstPath, false), conf);
    }
    return copyTask;
}
Also used : ReplCopyWork(org.apache.hadoop.hive.ql.plan.ReplCopyWork) ReplCopyWork(org.apache.hadoop.hive.ql.plan.ReplCopyWork) CopyWork(org.apache.hadoop.hive.ql.plan.CopyWork)

Example 3 with ReplCopyWork

use of org.apache.hadoop.hive.ql.plan.ReplCopyWork in project hive by apache.

the class ReplCopyTask method getLoadCopyTask.

public static Task<?> getLoadCopyTask(ReplicationSpec replicationSpec, Path srcPath, Path dstPath, HiveConf conf) {
    Task<?> copyTask = null;
    LOG.debug("ReplCopyTask:getLoadCopyTask: {}=>{}", srcPath, dstPath);
    if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
        ReplCopyWork rcwork = new ReplCopyWork(srcPath, dstPath, false);
        LOG.debug("ReplCopyTask:\trcwork");
        if (replicationSpec.isLazy()) {
            LOG.debug("ReplCopyTask:\tlazy");
            rcwork.setReadSrcAsFilesList(true);
            // It is assumed isLazy flag is set only for REPL LOAD flow.
            // IMPORT always do deep copy. So, distCpDoAsUser will be null by default in ReplCopyWork.
            String distCpDoAsUser = conf.getVar(HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER);
            rcwork.setDistCpDoAsUser(distCpDoAsUser);
        }
        copyTask = TaskFactory.get(rcwork, conf);
    } else {
        LOG.debug("ReplCopyTask:\tcwork");
        copyTask = TaskFactory.get(new CopyWork(srcPath, dstPath, false), conf);
    }
    return copyTask;
}
Also used : ReplCopyWork(org.apache.hadoop.hive.ql.plan.ReplCopyWork) ReplCopyWork(org.apache.hadoop.hive.ql.plan.ReplCopyWork) CopyWork(org.apache.hadoop.hive.ql.plan.CopyWork)

Aggregations

ReplCopyWork (org.apache.hadoop.hive.ql.plan.ReplCopyWork)3 CopyWork (org.apache.hadoop.hive.ql.plan.CopyWork)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 ReplChangeManager (org.apache.hadoop.hive.metastore.ReplChangeManager)1 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)1 CopyUtils (org.apache.hadoop.hive.ql.parse.repl.CopyUtils)1