use of org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo in project hive by apache.
the class DDLTask method archive.
private int archive(Hive db, AlterTableSimpleDesc simpleDesc, DriverContext driverContext) throws HiveException {
Table tbl = db.getTable(simpleDesc.getTableName());
if (tbl.getTableType() != TableType.MANAGED_TABLE) {
throw new HiveException("ARCHIVE can only be performed on managed tables");
}
Map<String, String> partSpec = simpleDesc.getPartSpec();
PartSpecInfo partSpecInfo = PartSpecInfo.create(tbl, partSpec);
List<Partition> partitions = db.getPartitions(tbl, partSpec);
Path originalDir = null;
// to keep backward compatibility
if (partitions.isEmpty()) {
throw new HiveException("No partition matches the specification");
} else if (partSpecInfo.values.size() != tbl.getPartCols().size()) {
// for partial specifications we need partitions to follow the scheme
for (Partition p : partitions) {
if (partitionInCustomLocation(tbl, p)) {
String message = String.format("ARCHIVE cannot run for partition " + "groups with custom locations like %s", p.getLocation());
throw new HiveException(message);
}
}
originalDir = partSpecInfo.createPath(tbl);
} else {
Partition p = partitions.get(0);
// partition can be archived if during recovery
if (ArchiveUtils.isArchived(p)) {
originalDir = new Path(getOriginalLocation(p));
} else {
originalDir = p.getDataLocation();
}
}
Path intermediateArchivedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
Path intermediateOriginalDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX);
console.printInfo("intermediate.archived is " + intermediateArchivedDir.toString());
console.printInfo("intermediate.original is " + intermediateOriginalDir.toString());
String archiveName = "data.har";
FileSystem fs = null;
try {
fs = originalDir.getFileSystem(conf);
} catch (IOException e) {
throw new HiveException(e);
}
URI archiveUri = (new Path(originalDir, archiveName)).toUri();
URI originalUri = ArchiveUtils.addSlash(originalDir.toUri());
ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(conf, archiveUri, originalUri);
// if they are different, we throw an error
for (Partition p : partitions) {
if (ArchiveUtils.isArchived(p)) {
if (ArchiveUtils.getArchivingLevel(p) != partSpecInfo.values.size()) {
String name = ArchiveUtils.getPartialName(p, ArchiveUtils.getArchivingLevel(p));
String m = String.format("Conflict with existing archive %s", name);
throw new HiveException(m);
} else {
throw new HiveException("Partition(s) already archived");
}
}
}
boolean recovery = false;
if (pathExists(intermediateArchivedDir) || pathExists(intermediateOriginalDir)) {
recovery = true;
console.printInfo("Starting recovery after failed ARCHIVE");
}
// to use as the move operation that created it is atomic.
if (!pathExists(intermediateArchivedDir) && !pathExists(intermediateOriginalDir)) {
// First create the archive in a tmp dir so that if the job fails, the
// bad files don't pollute the filesystem
Path tmpPath = new Path(driverContext.getCtx().getExternalTmpPath(originalDir), "partlevel");
console.printInfo("Creating " + archiveName + " for " + originalDir.toString());
console.printInfo("in " + tmpPath);
console.printInfo("Please wait... (this may take a while)");
// Create the Hadoop archive
int ret = 0;
try {
int maxJobNameLen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
String jobname = String.format("Archiving %s@%s", tbl.getTableName(), partSpecInfo.getName());
jobname = Utilities.abbreviate(jobname, maxJobNameLen - 6);
conf.set(MRJobConfig.JOB_NAME, jobname);
HadoopArchives har = new HadoopArchives(conf);
List<String> args = new ArrayList<String>();
args.add("-archiveName");
args.add(archiveName);
args.add("-p");
args.add(originalDir.toString());
args.add(tmpPath.toString());
ret = ToolRunner.run(har, args.toArray(new String[0]));
} catch (Exception e) {
throw new HiveException(e);
}
if (ret != 0) {
throw new HiveException("Error while creating HAR");
}
// the partition directory. e.g. .../hr=12-intermediate-archived
try {
console.printInfo("Moving " + tmpPath + " to " + intermediateArchivedDir);
if (pathExists(intermediateArchivedDir)) {
throw new HiveException("The intermediate archive directory already exists.");
}
fs.rename(tmpPath, intermediateArchivedDir);
} catch (IOException e) {
throw new HiveException("Error while moving tmp directory");
}
} else {
if (pathExists(intermediateArchivedDir)) {
console.printInfo("Intermediate archive directory " + intermediateArchivedDir + " already exists. Assuming it contains an archived version of the partition");
}
}
// if the move hasn't been made already
if (!pathExists(intermediateOriginalDir)) {
console.printInfo("Moving " + originalDir + " to " + intermediateOriginalDir);
moveDir(fs, originalDir, intermediateOriginalDir);
} else {
console.printInfo(intermediateOriginalDir + " already exists. " + "Assuming it contains the original files in the partition");
}
// Move the intermediate archived directory to the original parent directory
if (!pathExists(originalDir)) {
console.printInfo("Moving " + intermediateArchivedDir + " to " + originalDir);
moveDir(fs, intermediateArchivedDir, originalDir);
} else {
console.printInfo(originalDir + " already exists. " + "Assuming it contains the archived version of the partition");
}
// Record this change in the metastore
try {
for (Partition p : partitions) {
URI originalPartitionUri = ArchiveUtils.addSlash(p.getDataLocation().toUri());
URI harPartitionDir = harHelper.getHarUri(originalPartitionUri);
StringBuilder authority = new StringBuilder();
if (harPartitionDir.getUserInfo() != null) {
authority.append(harPartitionDir.getUserInfo()).append("@");
}
authority.append(harPartitionDir.getHost());
if (harPartitionDir.getPort() != -1) {
authority.append(":").append(harPartitionDir.getPort());
}
Path harPath = new Path(harPartitionDir.getScheme(), authority.toString(), // make in Path to ensure no slash at the end
harPartitionDir.getPath());
setArchived(p, harPath, partSpecInfo.values.size());
db.alterPartition(simpleDesc.getTableName(), p, null);
}
} catch (Exception e) {
throw new HiveException("Unable to change the partition info for HAR", e);
}
// will not be deleted. The user will run ARCHIVE again to clear this up
if (pathExists(intermediateOriginalDir)) {
deleteDir(intermediateOriginalDir);
}
if (recovery) {
console.printInfo("Recovery after ARCHIVE succeeded");
}
return 0;
}
use of org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo in project hive by apache.
the class DDLTask method unarchive.
private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc) throws HiveException, URISyntaxException {
Table tbl = db.getTable(simpleDesc.getTableName());
// Means user specified a table, not a partition
if (simpleDesc.getPartSpec() == null) {
throw new HiveException("UNARCHIVE is for partitions only");
}
if (tbl.getTableType() != TableType.MANAGED_TABLE) {
throw new HiveException("UNARCHIVE can only be performed on managed tables");
}
Map<String, String> partSpec = simpleDesc.getPartSpec();
PartSpecInfo partSpecInfo = PartSpecInfo.create(tbl, partSpec);
List<Partition> partitions = db.getPartitions(tbl, partSpec);
int partSpecLevel = partSpec.size();
Path originalDir = null;
// to keep backward compatibility
if (partitions.isEmpty()) {
throw new HiveException("No partition matches the specification");
} else if (partSpecInfo.values.size() != tbl.getPartCols().size()) {
// for partial specifications we need partitions to follow the scheme
for (Partition p : partitions) {
if (partitionInCustomLocation(tbl, p)) {
String message = String.format("UNARCHIVE cannot run for partition " + "groups with custom locations like %s", p.getLocation());
throw new HiveException(message);
}
}
originalDir = partSpecInfo.createPath(tbl);
} else {
Partition p = partitions.get(0);
if (ArchiveUtils.isArchived(p)) {
originalDir = new Path(getOriginalLocation(p));
} else {
originalDir = new Path(p.getLocation());
}
}
URI originalUri = ArchiveUtils.addSlash(originalDir.toUri());
Path intermediateArchivedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
Path intermediateExtractedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_EXTRACTED_DIR_SUFFIX);
boolean recovery = false;
if (pathExists(intermediateArchivedDir) || pathExists(intermediateExtractedDir)) {
recovery = true;
console.printInfo("Starting recovery after failed UNARCHIVE");
}
for (Partition p : partitions) {
checkArchiveProperty(partSpecLevel, recovery, p);
}
String archiveName = "data.har";
FileSystem fs = null;
try {
fs = originalDir.getFileSystem(conf);
} catch (IOException e) {
throw new HiveException(e);
}
// assume the archive is in the original dir, check if it exists
Path archivePath = new Path(originalDir, archiveName);
URI archiveUri = archivePath.toUri();
ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(conf, archiveUri, originalUri);
URI sourceUri = harHelper.getHarUri(originalUri);
Path sourceDir = new Path(sourceUri.getScheme(), sourceUri.getAuthority(), sourceUri.getPath());
if (!pathExists(intermediateArchivedDir) && !pathExists(archivePath)) {
throw new HiveException("Haven't found any archive where it should be");
}
Path tmpPath = driverContext.getCtx().getExternalTmpPath(originalDir);
try {
fs = tmpPath.getFileSystem(conf);
} catch (IOException e) {
throw new HiveException(e);
}
if (!pathExists(intermediateExtractedDir) && !pathExists(intermediateArchivedDir)) {
try {
// Copy the files out of the archive into the temporary directory
String copySource = sourceDir.toString();
String copyDest = tmpPath.toString();
List<String> args = new ArrayList<String>();
args.add("-cp");
args.add(copySource);
args.add(copyDest);
console.printInfo("Copying " + copySource + " to " + copyDest);
FileSystem srcFs = FileSystem.get(sourceDir.toUri(), conf);
srcFs.initialize(sourceDir.toUri(), conf);
FsShell fss = new FsShell(conf);
int ret = 0;
try {
ret = ToolRunner.run(fss, args.toArray(new String[0]));
} catch (Exception e) {
e.printStackTrace();
throw new HiveException(e);
}
if (ret != 0) {
throw new HiveException("Error while copying files from archive, return code=" + ret);
} else {
console.printInfo("Successfully Copied " + copySource + " to " + copyDest);
}
console.printInfo("Moving " + tmpPath + " to " + intermediateExtractedDir);
if (fs.exists(intermediateExtractedDir)) {
throw new HiveException("Invalid state: the intermediate extracted " + "directory already exists.");
}
fs.rename(tmpPath, intermediateExtractedDir);
} catch (Exception e) {
throw new HiveException(e);
}
}
if (!pathExists(intermediateArchivedDir)) {
try {
console.printInfo("Moving " + originalDir + " to " + intermediateArchivedDir);
fs.rename(originalDir, intermediateArchivedDir);
} catch (IOException e) {
throw new HiveException(e);
}
} else {
console.printInfo(intermediateArchivedDir + " already exists. " + "Assuming it contains the archived version of the partition");
}
// (containing the archived version of the files) to intermediateArchiveDir
if (!pathExists(originalDir)) {
try {
console.printInfo("Moving " + intermediateExtractedDir + " to " + originalDir);
fs.rename(intermediateExtractedDir, originalDir);
} catch (IOException e) {
throw new HiveException(e);
}
} else {
console.printInfo(originalDir + " already exists. " + "Assuming it contains the extracted files in the partition");
}
for (Partition p : partitions) {
setUnArchived(p);
try {
db.alterPartition(simpleDesc.getTableName(), p, null);
} catch (InvalidOperationException e) {
throw new HiveException(e);
}
}
// deleted. The user will need to call unarchive again to clear those up.
if (pathExists(intermediateArchivedDir)) {
deleteDir(intermediateArchivedDir);
}
if (recovery) {
console.printInfo("Recovery after UNARCHIVE succeeded");
}
return 0;
}
Aggregations