use of org.apache.hadoop.hive.ql.plan.ReplCopyWork in project hive by apache.
the class ReplCopyTask method execute.
@Override
protected int execute(DriverContext driverContext) {
LOG.debug("ReplCopyTask.execute()");
FileSystem dstFs = null;
Path toPath = null;
try {
// Not clear of ReplCopyWork should inherit from CopyWork.
if (work.getFromPaths().length > 1 || work.getToPaths().length > 1) {
throw new RuntimeException("Invalid ReplCopyWork: " + work.getFromPaths() + ", " + work.getToPaths());
}
Path fromPath = work.getFromPaths()[0];
toPath = work.getToPaths()[0];
console.printInfo("Copying data from " + fromPath.toString(), " to " + toPath.toString());
ReplCopyWork rwork = ((ReplCopyWork) work);
FileSystem srcFs = fromPath.getFileSystem(conf);
dstFs = toPath.getFileSystem(conf);
// be a CM uri in the from path.
if (ReplChangeManager.isCMFileUri(fromPath, srcFs)) {
String[] result = ReplChangeManager.getFileWithChksumFromURI(fromPath.toString());
ReplChangeManager.FileInfo sourceInfo = ReplChangeManager.getFileInfo(new Path(result[0]), result[1], conf);
if (FileUtils.copy(sourceInfo.getSrcFs(), sourceInfo.getSourcePath(), dstFs, toPath, false, false, conf)) {
return 0;
} else {
console.printError("Failed to copy: '" + fromPath.toString() + "to: '" + toPath.toString() + "'");
return 1;
}
}
List<ReplChangeManager.FileInfo> srcFiles = new ArrayList<>();
if (rwork.readSrcAsFilesList()) {
// This flow is usually taken for REPL LOAD
// Our input is the result of a _files listing, we should expand out _files.
srcFiles = filesInFileListing(srcFs, fromPath);
if (LOG.isDebugEnabled()) {
LOG.debug("ReplCopyTask _files contains: {}", (srcFiles == null ? "null" : srcFiles.size()));
}
if ((srcFiles == null) || (srcFiles.isEmpty())) {
if (work.isErrorOnSrcEmpty()) {
console.printError("No _files entry found on source: " + fromPath.toString());
return 5;
} else {
return 0;
}
}
} else {
// This flow is usually taken for IMPORT command
FileStatus[] srcs = LoadSemanticAnalyzer.matchFilesOrDir(srcFs, fromPath);
if (LOG.isDebugEnabled()) {
LOG.debug("ReplCopyTasks srcs= {}", (srcs == null ? "null" : srcs.length));
}
if (srcs == null || srcs.length == 0) {
if (work.isErrorOnSrcEmpty()) {
console.printError("No files matching path: " + fromPath.toString());
return 3;
} else {
return 0;
}
}
for (FileStatus oneSrc : srcs) {
console.printInfo("Copying file: " + oneSrc.getPath().toString());
LOG.debug("ReplCopyTask :cp:{}=>{}", oneSrc.getPath(), toPath);
srcFiles.add(new ReplChangeManager.FileInfo(oneSrc.getPath().getFileSystem(conf), oneSrc.getPath()));
}
}
LOG.debug("ReplCopyTask numFiles: {}", srcFiles.size());
if (!FileUtils.mkdir(dstFs, toPath, conf)) {
console.printError("Cannot make target directory: " + toPath.toString());
return 2;
}
// Copy the files from different source file systems to one destination directory
new CopyUtils(rwork.distCpDoAsUser(), conf).copyAndVerify(dstFs, toPath, srcFiles);
// where the first event refers to source path and second event refers to CM path
for (ReplChangeManager.FileInfo srcFile : srcFiles) {
if (srcFile.isUseSourcePath()) {
continue;
}
String destFileName = srcFile.getCmPath().getName();
Path destFile = new Path(toPath, destFileName);
if (dstFs.exists(destFile)) {
String destFileWithSourceName = srcFile.getSourcePath().getName();
Path newDestFile = new Path(toPath, destFileWithSourceName);
boolean result = dstFs.rename(destFile, newDestFile);
if (!result) {
throw new IllegalStateException("could not rename " + destFile.getName() + " to " + newDestFile.getName());
}
}
}
return 0;
} catch (Exception e) {
console.printError("Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e));
return (1);
}
}
use of org.apache.hadoop.hive.ql.plan.ReplCopyWork in project hive by apache.
the class ReplCopyTask method getDumpCopyTask.
public static Task<?> getDumpCopyTask(ReplicationSpec replicationSpec, Path srcPath, Path dstPath, HiveConf conf) {
Task<?> copyTask = null;
LOG.debug("ReplCopyTask:getDumpCopyTask: " + srcPath + "=>" + dstPath);
if (replicationSpec.isInReplicationScope()) {
ReplCopyWork rcwork = new ReplCopyWork(srcPath, dstPath, false);
LOG.debug("ReplCopyTask:\trcwork");
if (replicationSpec.isLazy()) {
LOG.debug("ReplCopyTask:\tlazy");
rcwork.setListFilesOnOutputBehaviour(true);
}
copyTask = TaskFactory.get(rcwork, conf);
} else {
LOG.debug("ReplCopyTask:\tcwork");
copyTask = TaskFactory.get(new CopyWork(srcPath, dstPath, false), conf);
}
return copyTask;
}
use of org.apache.hadoop.hive.ql.plan.ReplCopyWork in project hive by apache.
the class ReplCopyTask method getLoadCopyTask.
public static Task<?> getLoadCopyTask(ReplicationSpec replicationSpec, Path srcPath, Path dstPath, HiveConf conf) {
Task<?> copyTask = null;
LOG.debug("ReplCopyTask:getLoadCopyTask: {}=>{}", srcPath, dstPath);
if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
ReplCopyWork rcwork = new ReplCopyWork(srcPath, dstPath, false);
LOG.debug("ReplCopyTask:\trcwork");
if (replicationSpec.isLazy()) {
LOG.debug("ReplCopyTask:\tlazy");
rcwork.setReadSrcAsFilesList(true);
// It is assumed isLazy flag is set only for REPL LOAD flow.
// IMPORT always do deep copy. So, distCpDoAsUser will be null by default in ReplCopyWork.
String distCpDoAsUser = conf.getVar(HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER);
rcwork.setDistCpDoAsUser(distCpDoAsUser);
}
copyTask = TaskFactory.get(rcwork, conf);
} else {
LOG.debug("ReplCopyTask:\tcwork");
copyTask = TaskFactory.get(new CopyWork(srcPath, dstPath, false), conf);
}
return copyTask;
}
Aggregations