Search in sources :

Example 1 with CopyUtils

use of org.apache.hadoop.hive.ql.parse.repl.CopyUtils in project hive by apache.

the class ReplCopyTask method execute.

@Override
protected int execute(DriverContext driverContext) {
    LOG.debug("ReplCopyTask.execute()");
    FileSystem dstFs = null;
    Path toPath = null;
    try {
        // Not clear of ReplCopyWork should inherit from CopyWork.
        if (work.getFromPaths().length > 1 || work.getToPaths().length > 1) {
            throw new RuntimeException("Invalid ReplCopyWork: " + work.getFromPaths() + ", " + work.getToPaths());
        }
        Path fromPath = work.getFromPaths()[0];
        toPath = work.getToPaths()[0];
        console.printInfo("Copying data from " + fromPath.toString(), " to " + toPath.toString());
        ReplCopyWork rwork = ((ReplCopyWork) work);
        FileSystem srcFs = fromPath.getFileSystem(conf);
        dstFs = toPath.getFileSystem(conf);
        // be a CM uri in the from path.
        if (ReplChangeManager.isCMFileUri(fromPath, srcFs)) {
            String[] result = ReplChangeManager.getFileWithChksumFromURI(fromPath.toString());
            ReplChangeManager.FileInfo sourceInfo = ReplChangeManager.getFileInfo(new Path(result[0]), result[1], conf);
            if (FileUtils.copy(sourceInfo.getSrcFs(), sourceInfo.getSourcePath(), dstFs, toPath, false, false, conf)) {
                return 0;
            } else {
                console.printError("Failed to copy: '" + fromPath.toString() + "to: '" + toPath.toString() + "'");
                return 1;
            }
        }
        List<ReplChangeManager.FileInfo> srcFiles = new ArrayList<>();
        if (rwork.readSrcAsFilesList()) {
            // This flow is usually taken for REPL LOAD
            // Our input is the result of a _files listing, we should expand out _files.
            srcFiles = filesInFileListing(srcFs, fromPath);
            if (LOG.isDebugEnabled()) {
                LOG.debug("ReplCopyTask _files contains: {}", (srcFiles == null ? "null" : srcFiles.size()));
            }
            if ((srcFiles == null) || (srcFiles.isEmpty())) {
                if (work.isErrorOnSrcEmpty()) {
                    console.printError("No _files entry found on source: " + fromPath.toString());
                    return 5;
                } else {
                    return 0;
                }
            }
        } else {
            // This flow is usually taken for IMPORT command
            FileStatus[] srcs = LoadSemanticAnalyzer.matchFilesOrDir(srcFs, fromPath);
            if (LOG.isDebugEnabled()) {
                LOG.debug("ReplCopyTasks srcs= {}", (srcs == null ? "null" : srcs.length));
            }
            if (srcs == null || srcs.length == 0) {
                if (work.isErrorOnSrcEmpty()) {
                    console.printError("No files matching path: " + fromPath.toString());
                    return 3;
                } else {
                    return 0;
                }
            }
            for (FileStatus oneSrc : srcs) {
                console.printInfo("Copying file: " + oneSrc.getPath().toString());
                LOG.debug("ReplCopyTask :cp:{}=>{}", oneSrc.getPath(), toPath);
                srcFiles.add(new ReplChangeManager.FileInfo(oneSrc.getPath().getFileSystem(conf), oneSrc.getPath()));
            }
        }
        LOG.debug("ReplCopyTask numFiles: {}", srcFiles.size());
        if (!FileUtils.mkdir(dstFs, toPath, conf)) {
            console.printError("Cannot make target directory: " + toPath.toString());
            return 2;
        }
        // Copy the files from different source file systems to one destination directory
        new CopyUtils(rwork.distCpDoAsUser(), conf).copyAndVerify(dstFs, toPath, srcFiles);
        // where the first event refers to source path and  second event refers to CM path
        for (ReplChangeManager.FileInfo srcFile : srcFiles) {
            if (srcFile.isUseSourcePath()) {
                continue;
            }
            String destFileName = srcFile.getCmPath().getName();
            Path destFile = new Path(toPath, destFileName);
            if (dstFs.exists(destFile)) {
                String destFileWithSourceName = srcFile.getSourcePath().getName();
                Path newDestFile = new Path(toPath, destFileWithSourceName);
                boolean result = dstFs.rename(destFile, newDestFile);
                if (!result) {
                    throw new IllegalStateException("could not rename " + destFile.getName() + " to " + newDestFile.getName());
                }
            }
        }
        return 0;
    } catch (Exception e) {
        console.printError("Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e));
        return (1);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ReplCopyWork(org.apache.hadoop.hive.ql.plan.ReplCopyWork) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) ReplChangeManager(org.apache.hadoop.hive.metastore.ReplChangeManager) CopyUtils(org.apache.hadoop.hive.ql.parse.repl.CopyUtils)

Example 2 with CopyUtils

use of org.apache.hadoop.hive.ql.parse.repl.CopyUtils in project hive by apache.

the class FileOperations method copyFiles.

/**
 * This writes the actual data in the exportRootDataDir from the source.
 */
private void copyFiles() throws IOException, LoginException {
    FileStatus[] fileStatuses = LoadSemanticAnalyzer.matchFilesOrDir(dataFileSystem, dataFileListPath);
    List<Path> srcPaths = new ArrayList<>();
    for (FileStatus fileStatus : fileStatuses) {
        srcPaths.add(fileStatus.getPath());
    }
    new CopyUtils(distCpDoAsUser, hiveConf).doCopy(exportRootDataDir, srcPaths);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) CopyUtils(org.apache.hadoop.hive.ql.parse.repl.CopyUtils)

Aggregations

ArrayList (java.util.ArrayList)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 Path (org.apache.hadoop.fs.Path)2 CopyUtils (org.apache.hadoop.hive.ql.parse.repl.CopyUtils)2 IOException (java.io.IOException)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 ReplChangeManager (org.apache.hadoop.hive.metastore.ReplChangeManager)1 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)1 ReplCopyWork (org.apache.hadoop.hive.ql.plan.ReplCopyWork)1