use of org.apache.hadoop.tools.HadoopArchives in project hive by apache.
the class DDLTask method archive.
private int archive(Hive db, AlterTableSimpleDesc simpleDesc, DriverContext driverContext) throws HiveException {
Table tbl = db.getTable(simpleDesc.getTableName());
if (tbl.getTableType() != TableType.MANAGED_TABLE) {
throw new HiveException("ARCHIVE can only be performed on managed tables");
}
Map<String, String> partSpec = simpleDesc.getPartSpec();
PartSpecInfo partSpecInfo = PartSpecInfo.create(tbl, partSpec);
List<Partition> partitions = db.getPartitions(tbl, partSpec);
Path originalDir = null;
// to keep backward compatibility
if (partitions.isEmpty()) {
throw new HiveException("No partition matches the specification");
} else if (partSpecInfo.values.size() != tbl.getPartCols().size()) {
// for partial specifications we need partitions to follow the scheme
for (Partition p : partitions) {
if (partitionInCustomLocation(tbl, p)) {
String message = String.format("ARCHIVE cannot run for partition " + "groups with custom locations like %s", p.getLocation());
throw new HiveException(message);
}
}
originalDir = partSpecInfo.createPath(tbl);
} else {
Partition p = partitions.get(0);
// partition can be archived if during recovery
if (ArchiveUtils.isArchived(p)) {
originalDir = new Path(getOriginalLocation(p));
} else {
originalDir = p.getDataLocation();
}
}
Path intermediateArchivedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
Path intermediateOriginalDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX);
console.printInfo("intermediate.archived is " + intermediateArchivedDir.toString());
console.printInfo("intermediate.original is " + intermediateOriginalDir.toString());
String archiveName = "data.har";
FileSystem fs = null;
try {
fs = originalDir.getFileSystem(conf);
} catch (IOException e) {
throw new HiveException(e);
}
URI archiveUri = (new Path(originalDir, archiveName)).toUri();
URI originalUri = ArchiveUtils.addSlash(originalDir.toUri());
ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(conf, archiveUri, originalUri);
// if they are different, we throw an error
for (Partition p : partitions) {
if (ArchiveUtils.isArchived(p)) {
if (ArchiveUtils.getArchivingLevel(p) != partSpecInfo.values.size()) {
String name = ArchiveUtils.getPartialName(p, ArchiveUtils.getArchivingLevel(p));
String m = String.format("Conflict with existing archive %s", name);
throw new HiveException(m);
} else {
throw new HiveException("Partition(s) already archived");
}
}
}
boolean recovery = false;
if (pathExists(intermediateArchivedDir) || pathExists(intermediateOriginalDir)) {
recovery = true;
console.printInfo("Starting recovery after failed ARCHIVE");
}
// to use as the move operation that created it is atomic.
if (!pathExists(intermediateArchivedDir) && !pathExists(intermediateOriginalDir)) {
// First create the archive in a tmp dir so that if the job fails, the
// bad files don't pollute the filesystem
Path tmpPath = new Path(driverContext.getCtx().getExternalTmpPath(originalDir), "partlevel");
console.printInfo("Creating " + archiveName + " for " + originalDir.toString());
console.printInfo("in " + tmpPath);
console.printInfo("Please wait... (this may take a while)");
// Create the Hadoop archive
int ret = 0;
try {
int maxJobNameLen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
String jobname = String.format("Archiving %s@%s", tbl.getTableName(), partSpecInfo.getName());
jobname = Utilities.abbreviate(jobname, maxJobNameLen - 6);
conf.set(MRJobConfig.JOB_NAME, jobname);
HadoopArchives har = new HadoopArchives(conf);
List<String> args = new ArrayList<String>();
args.add("-archiveName");
args.add(archiveName);
args.add("-p");
args.add(originalDir.toString());
args.add(tmpPath.toString());
ret = ToolRunner.run(har, args.toArray(new String[0]));
} catch (Exception e) {
throw new HiveException(e);
}
if (ret != 0) {
throw new HiveException("Error while creating HAR");
}
// the partition directory. e.g. .../hr=12-intermediate-archived
try {
console.printInfo("Moving " + tmpPath + " to " + intermediateArchivedDir);
if (pathExists(intermediateArchivedDir)) {
throw new HiveException("The intermediate archive directory already exists.");
}
fs.rename(tmpPath, intermediateArchivedDir);
} catch (IOException e) {
throw new HiveException("Error while moving tmp directory");
}
} else {
if (pathExists(intermediateArchivedDir)) {
console.printInfo("Intermediate archive directory " + intermediateArchivedDir + " already exists. Assuming it contains an archived version of the partition");
}
}
// if the move hasn't been made already
if (!pathExists(intermediateOriginalDir)) {
console.printInfo("Moving " + originalDir + " to " + intermediateOriginalDir);
moveDir(fs, originalDir, intermediateOriginalDir);
} else {
console.printInfo(intermediateOriginalDir + " already exists. " + "Assuming it contains the original files in the partition");
}
// Move the intermediate archived directory to the original parent directory
if (!pathExists(originalDir)) {
console.printInfo("Moving " + intermediateArchivedDir + " to " + originalDir);
moveDir(fs, intermediateArchivedDir, originalDir);
} else {
console.printInfo(originalDir + " already exists. " + "Assuming it contains the archived version of the partition");
}
// Record this change in the metastore
try {
for (Partition p : partitions) {
URI originalPartitionUri = ArchiveUtils.addSlash(p.getDataLocation().toUri());
URI harPartitionDir = harHelper.getHarUri(originalPartitionUri);
StringBuilder authority = new StringBuilder();
if (harPartitionDir.getUserInfo() != null) {
authority.append(harPartitionDir.getUserInfo()).append("@");
}
authority.append(harPartitionDir.getHost());
if (harPartitionDir.getPort() != -1) {
authority.append(":").append(harPartitionDir.getPort());
}
Path harPath = new Path(harPartitionDir.getScheme(), authority.toString(), // make in Path to ensure no slash at the end
harPartitionDir.getPath());
setArchived(p, harPath, partSpecInfo.values.size());
db.alterPartition(simpleDesc.getTableName(), p, null);
}
} catch (Exception e) {
throw new HiveException("Unable to change the partition info for HAR", e);
}
// will not be deleted. The user will run ARCHIVE again to clear this up
if (pathExists(intermediateOriginalDir)) {
deleteDir(intermediateOriginalDir);
}
if (recovery) {
console.printInfo("Recovery after ARCHIVE succeeded");
}
return 0;
}
use of org.apache.hadoop.tools.HadoopArchives in project hive by apache.
the class HarOutputCommitterPostProcessor method makeHar.
/**
* Creates a har file from the contents of a given directory, using that as root.
* @param dir Directory to archive
* @param harFile The HAR file to create
*/
public static void makeHar(JobContext context, String dir, String harFile) throws IOException {
// Configuration conf = context.getConfiguration();
// Credentials creds = context.getCredentials();
// HCatUtil.logAllTokens(LOG,context);
int lastSep = harFile.lastIndexOf(Path.SEPARATOR_CHAR);
Path archivePath = new Path(harFile.substring(0, lastSep));
final String[] args = { "-archiveName", harFile.substring(lastSep + 1, harFile.length()), "-p", dir, "*", archivePath.toString() };
// }
try {
Configuration newConf = new Configuration();
FileSystem fs = archivePath.getFileSystem(newConf);
String hadoopTokenFileLocationEnvSetting = System.getenv(HCatConstants.SYSENV_HADOOP_TOKEN_FILE_LOCATION);
if ((hadoopTokenFileLocationEnvSetting != null) && (!hadoopTokenFileLocationEnvSetting.isEmpty())) {
newConf.set(HCatConstants.CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocationEnvSetting);
// LOG.info("System.getenv(\"HADOOP_TOKEN_FILE_LOCATION\") =["+ System.getenv("HADOOP_TOKEN_FILE_LOCATION")+"]");
}
// for (FileStatus ds : fs.globStatus(new Path(dir, "*"))){
// LOG.info("src : "+ds.getPath().toUri().toString());
// }
final HadoopArchives har = new HadoopArchives(newConf);
int rc = ToolRunner.run(har, args);
if (rc != 0) {
throw new Exception("Har returned error code " + rc);
}
// for (FileStatus hs : fs.globStatus(new Path(harFile, "*"))){
// LOG.info("dest : "+hs.getPath().toUri().toString());
// }
// doHarCheck(fs,harFile);
// LOG.info("Nuking " + dir);
fs.delete(new Path(dir), true);
} catch (Exception e) {
throw new HCatException("Error creating Har [" + harFile + "] from [" + dir + "]", e);
}
}
Aggregations