Search in sources :

Example 1 with PartSpecInfo

use of org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo in project hive by apache.

the class DDLTask method archive.

private int archive(Hive db, AlterTableSimpleDesc simpleDesc, DriverContext driverContext) throws HiveException {
    Table tbl = db.getTable(simpleDesc.getTableName());
    if (tbl.getTableType() != TableType.MANAGED_TABLE) {
        throw new HiveException("ARCHIVE can only be performed on managed tables");
    }
    Map<String, String> partSpec = simpleDesc.getPartSpec();
    PartSpecInfo partSpecInfo = PartSpecInfo.create(tbl, partSpec);
    List<Partition> partitions = db.getPartitions(tbl, partSpec);
    Path originalDir = null;
    // to keep backward compatibility
    if (partitions.isEmpty()) {
        throw new HiveException("No partition matches the specification");
    } else if (partSpecInfo.values.size() != tbl.getPartCols().size()) {
        // for partial specifications we need partitions to follow the scheme
        for (Partition p : partitions) {
            if (partitionInCustomLocation(tbl, p)) {
                String message = String.format("ARCHIVE cannot run for partition " + "groups with custom locations like %s", p.getLocation());
                throw new HiveException(message);
            }
        }
        originalDir = partSpecInfo.createPath(tbl);
    } else {
        Partition p = partitions.get(0);
        // partition can be archived if during recovery
        if (ArchiveUtils.isArchived(p)) {
            originalDir = new Path(getOriginalLocation(p));
        } else {
            originalDir = p.getDataLocation();
        }
    }
    Path intermediateArchivedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
    Path intermediateOriginalDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX);
    console.printInfo("intermediate.archived is " + intermediateArchivedDir.toString());
    console.printInfo("intermediate.original is " + intermediateOriginalDir.toString());
    String archiveName = "data.har";
    FileSystem fs = null;
    try {
        fs = originalDir.getFileSystem(conf);
    } catch (IOException e) {
        throw new HiveException(e);
    }
    URI archiveUri = (new Path(originalDir, archiveName)).toUri();
    URI originalUri = ArchiveUtils.addSlash(originalDir.toUri());
    ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(conf, archiveUri, originalUri);
    // if they are different, we throw an error
    for (Partition p : partitions) {
        if (ArchiveUtils.isArchived(p)) {
            if (ArchiveUtils.getArchivingLevel(p) != partSpecInfo.values.size()) {
                String name = ArchiveUtils.getPartialName(p, ArchiveUtils.getArchivingLevel(p));
                String m = String.format("Conflict with existing archive %s", name);
                throw new HiveException(m);
            } else {
                throw new HiveException("Partition(s) already archived");
            }
        }
    }
    boolean recovery = false;
    if (pathExists(intermediateArchivedDir) || pathExists(intermediateOriginalDir)) {
        recovery = true;
        console.printInfo("Starting recovery after failed ARCHIVE");
    }
    // to use as the move operation that created it is atomic.
    if (!pathExists(intermediateArchivedDir) && !pathExists(intermediateOriginalDir)) {
        // First create the archive in a tmp dir so that if the job fails, the
        // bad files don't pollute the filesystem
        Path tmpPath = new Path(driverContext.getCtx().getExternalTmpPath(originalDir), "partlevel");
        console.printInfo("Creating " + archiveName + " for " + originalDir.toString());
        console.printInfo("in " + tmpPath);
        console.printInfo("Please wait... (this may take a while)");
        // Create the Hadoop archive
        int ret = 0;
        try {
            int maxJobNameLen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
            String jobname = String.format("Archiving %s@%s", tbl.getTableName(), partSpecInfo.getName());
            jobname = Utilities.abbreviate(jobname, maxJobNameLen - 6);
            conf.set(MRJobConfig.JOB_NAME, jobname);
            HadoopArchives har = new HadoopArchives(conf);
            List<String> args = new ArrayList<String>();
            args.add("-archiveName");
            args.add(archiveName);
            args.add("-p");
            args.add(originalDir.toString());
            args.add(tmpPath.toString());
            ret = ToolRunner.run(har, args.toArray(new String[0]));
        } catch (Exception e) {
            throw new HiveException(e);
        }
        if (ret != 0) {
            throw new HiveException("Error while creating HAR");
        }
        // the partition directory. e.g. .../hr=12-intermediate-archived
        try {
            console.printInfo("Moving " + tmpPath + " to " + intermediateArchivedDir);
            if (pathExists(intermediateArchivedDir)) {
                throw new HiveException("The intermediate archive directory already exists.");
            }
            fs.rename(tmpPath, intermediateArchivedDir);
        } catch (IOException e) {
            throw new HiveException("Error while moving tmp directory");
        }
    } else {
        if (pathExists(intermediateArchivedDir)) {
            console.printInfo("Intermediate archive directory " + intermediateArchivedDir + " already exists. Assuming it contains an archived version of the partition");
        }
    }
    // if the move hasn't been made already
    if (!pathExists(intermediateOriginalDir)) {
        console.printInfo("Moving " + originalDir + " to " + intermediateOriginalDir);
        moveDir(fs, originalDir, intermediateOriginalDir);
    } else {
        console.printInfo(intermediateOriginalDir + " already exists. " + "Assuming it contains the original files in the partition");
    }
    // Move the intermediate archived directory to the original parent directory
    if (!pathExists(originalDir)) {
        console.printInfo("Moving " + intermediateArchivedDir + " to " + originalDir);
        moveDir(fs, intermediateArchivedDir, originalDir);
    } else {
        console.printInfo(originalDir + " already exists. " + "Assuming it contains the archived version of the partition");
    }
    // Record this change in the metastore
    try {
        for (Partition p : partitions) {
            URI originalPartitionUri = ArchiveUtils.addSlash(p.getDataLocation().toUri());
            URI harPartitionDir = harHelper.getHarUri(originalPartitionUri);
            StringBuilder authority = new StringBuilder();
            if (harPartitionDir.getUserInfo() != null) {
                authority.append(harPartitionDir.getUserInfo()).append("@");
            }
            authority.append(harPartitionDir.getHost());
            if (harPartitionDir.getPort() != -1) {
                authority.append(":").append(harPartitionDir.getPort());
            }
            Path harPath = new Path(harPartitionDir.getScheme(), authority.toString(), // make in Path to ensure no slash at the end
            harPartitionDir.getPath());
            setArchived(p, harPath, partSpecInfo.values.size());
            db.alterPartition(simpleDesc.getTableName(), p, null);
        }
    } catch (Exception e) {
        throw new HiveException("Unable to change the partition info for HAR", e);
    }
    // will not be deleted. The user will run ARCHIVE again to clear this up
    if (pathExists(intermediateOriginalDir)) {
        deleteDir(intermediateOriginalDir);
    }
    if (recovery) {
        console.printInfo("Recovery after ARCHIVE succeeded");
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) URI(java.net.URI) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException(java.io.IOException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) StringUtils.stringifyException(org.apache.hadoop.util.StringUtils.stringifyException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SQLException(java.sql.SQLException) FileNotFoundException(java.io.FileNotFoundException) HiveAuthzPluginException(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) PartSpecInfo(org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo) FileSystem(org.apache.hadoop.fs.FileSystem) HadoopArchives(org.apache.hadoop.tools.HadoopArchives)

Example 2 with PartSpecInfo

use of org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo in project hive by apache.

the class DDLTask method unarchive.

private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc) throws HiveException, URISyntaxException {
    Table tbl = db.getTable(simpleDesc.getTableName());
    // Means user specified a table, not a partition
    if (simpleDesc.getPartSpec() == null) {
        throw new HiveException("UNARCHIVE is for partitions only");
    }
    if (tbl.getTableType() != TableType.MANAGED_TABLE) {
        throw new HiveException("UNARCHIVE can only be performed on managed tables");
    }
    Map<String, String> partSpec = simpleDesc.getPartSpec();
    PartSpecInfo partSpecInfo = PartSpecInfo.create(tbl, partSpec);
    List<Partition> partitions = db.getPartitions(tbl, partSpec);
    int partSpecLevel = partSpec.size();
    Path originalDir = null;
    // to keep backward compatibility
    if (partitions.isEmpty()) {
        throw new HiveException("No partition matches the specification");
    } else if (partSpecInfo.values.size() != tbl.getPartCols().size()) {
        // for partial specifications we need partitions to follow the scheme
        for (Partition p : partitions) {
            if (partitionInCustomLocation(tbl, p)) {
                String message = String.format("UNARCHIVE cannot run for partition " + "groups with custom locations like %s", p.getLocation());
                throw new HiveException(message);
            }
        }
        originalDir = partSpecInfo.createPath(tbl);
    } else {
        Partition p = partitions.get(0);
        if (ArchiveUtils.isArchived(p)) {
            originalDir = new Path(getOriginalLocation(p));
        } else {
            originalDir = new Path(p.getLocation());
        }
    }
    URI originalUri = ArchiveUtils.addSlash(originalDir.toUri());
    Path intermediateArchivedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
    Path intermediateExtractedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_EXTRACTED_DIR_SUFFIX);
    boolean recovery = false;
    if (pathExists(intermediateArchivedDir) || pathExists(intermediateExtractedDir)) {
        recovery = true;
        console.printInfo("Starting recovery after failed UNARCHIVE");
    }
    for (Partition p : partitions) {
        checkArchiveProperty(partSpecLevel, recovery, p);
    }
    String archiveName = "data.har";
    FileSystem fs = null;
    try {
        fs = originalDir.getFileSystem(conf);
    } catch (IOException e) {
        throw new HiveException(e);
    }
    // assume the archive is in the original dir, check if it exists
    Path archivePath = new Path(originalDir, archiveName);
    URI archiveUri = archivePath.toUri();
    ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(conf, archiveUri, originalUri);
    URI sourceUri = harHelper.getHarUri(originalUri);
    Path sourceDir = new Path(sourceUri.getScheme(), sourceUri.getAuthority(), sourceUri.getPath());
    if (!pathExists(intermediateArchivedDir) && !pathExists(archivePath)) {
        throw new HiveException("Haven't found any archive where it should be");
    }
    Path tmpPath = driverContext.getCtx().getExternalTmpPath(originalDir);
    try {
        fs = tmpPath.getFileSystem(conf);
    } catch (IOException e) {
        throw new HiveException(e);
    }
    if (!pathExists(intermediateExtractedDir) && !pathExists(intermediateArchivedDir)) {
        try {
            // Copy the files out of the archive into the temporary directory
            String copySource = sourceDir.toString();
            String copyDest = tmpPath.toString();
            List<String> args = new ArrayList<String>();
            args.add("-cp");
            args.add(copySource);
            args.add(copyDest);
            console.printInfo("Copying " + copySource + " to " + copyDest);
            FileSystem srcFs = FileSystem.get(sourceDir.toUri(), conf);
            srcFs.initialize(sourceDir.toUri(), conf);
            FsShell fss = new FsShell(conf);
            int ret = 0;
            try {
                ret = ToolRunner.run(fss, args.toArray(new String[0]));
            } catch (Exception e) {
                e.printStackTrace();
                throw new HiveException(e);
            }
            if (ret != 0) {
                throw new HiveException("Error while copying files from archive, return code=" + ret);
            } else {
                console.printInfo("Successfully Copied " + copySource + " to " + copyDest);
            }
            console.printInfo("Moving " + tmpPath + " to " + intermediateExtractedDir);
            if (fs.exists(intermediateExtractedDir)) {
                throw new HiveException("Invalid state: the intermediate extracted " + "directory already exists.");
            }
            fs.rename(tmpPath, intermediateExtractedDir);
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
    if (!pathExists(intermediateArchivedDir)) {
        try {
            console.printInfo("Moving " + originalDir + " to " + intermediateArchivedDir);
            fs.rename(originalDir, intermediateArchivedDir);
        } catch (IOException e) {
            throw new HiveException(e);
        }
    } else {
        console.printInfo(intermediateArchivedDir + " already exists. " + "Assuming it contains the archived version of the partition");
    }
    // (containing the archived version of the files) to intermediateArchiveDir
    if (!pathExists(originalDir)) {
        try {
            console.printInfo("Moving " + intermediateExtractedDir + " to " + originalDir);
            fs.rename(intermediateExtractedDir, originalDir);
        } catch (IOException e) {
            throw new HiveException(e);
        }
    } else {
        console.printInfo(originalDir + " already exists. " + "Assuming it contains the extracted files in the partition");
    }
    for (Partition p : partitions) {
        setUnArchived(p);
        try {
            db.alterPartition(simpleDesc.getTableName(), p, null);
        } catch (InvalidOperationException e) {
            throw new HiveException(e);
        }
    }
    // deleted. The user will need to call unarchive again to clear those up.
    if (pathExists(intermediateArchivedDir)) {
        deleteDir(intermediateArchivedDir);
    }
    if (recovery) {
        console.printInfo("Recovery after UNARCHIVE succeeded");
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) URI(java.net.URI) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException(java.io.IOException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) StringUtils.stringifyException(org.apache.hadoop.util.StringUtils.stringifyException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SQLException(java.sql.SQLException) FileNotFoundException(java.io.FileNotFoundException) HiveAuthzPluginException(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) PartSpecInfo(org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo) FsShell(org.apache.hadoop.fs.FsShell) FileSystem(org.apache.hadoop.fs.FileSystem) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException)

Aggregations

FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 URI (java.net.URI)2 URISyntaxException (java.net.URISyntaxException)2 SQLException (java.sql.SQLException)2 ArrayList (java.util.ArrayList)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 AlreadyExistsException (org.apache.hadoop.hive.metastore.api.AlreadyExistsException)2 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)2 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)2 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)2 PartSpecInfo (org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)2 Partition (org.apache.hadoop.hive.ql.metadata.Partition)2 Table (org.apache.hadoop.hive.ql.metadata.Table)2 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)2 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)2 HiveAuthzPluginException (org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException)2