Examples with Partition - org.apache.hadoop.hive.ql.metadata.Partition

Example 6 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method unarchive.

private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc) throws HiveException, URISyntaxException {
    Table tbl = db.getTable(simpleDesc.getTableName());
    // Means user specified a table, not a partition
    if (simpleDesc.getPartSpec() == null) {
        throw new HiveException("UNARCHIVE is for partitions only");
    }
    if (tbl.getTableType() != TableType.MANAGED_TABLE) {
        throw new HiveException("UNARCHIVE can only be performed on managed tables");
    }
    Map<String, String> partSpec = simpleDesc.getPartSpec();
    PartSpecInfo partSpecInfo = PartSpecInfo.create(tbl, partSpec);
    List<Partition> partitions = db.getPartitions(tbl, partSpec);
    int partSpecLevel = partSpec.size();
    Path originalDir = null;
    // to keep backward compatibility
    if (partitions.isEmpty()) {
        throw new HiveException("No partition matches the specification");
    } else if (partSpecInfo.values.size() != tbl.getPartCols().size()) {
        // for partial specifications we need partitions to follow the scheme
        for (Partition p : partitions) {
            if (partitionInCustomLocation(tbl, p)) {
                String message = String.format("UNARCHIVE cannot run for partition " + "groups with custom locations like %s", p.getLocation());
                throw new HiveException(message);
            }
        }
        originalDir = partSpecInfo.createPath(tbl);
    } else {
        Partition p = partitions.get(0);
        if (ArchiveUtils.isArchived(p)) {
            originalDir = new Path(getOriginalLocation(p));
        } else {
            originalDir = new Path(p.getLocation());
        }
    }
    URI originalUri = ArchiveUtils.addSlash(originalDir.toUri());
    Path intermediateArchivedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
    Path intermediateExtractedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_EXTRACTED_DIR_SUFFIX);
    boolean recovery = false;
    if (pathExists(intermediateArchivedDir) || pathExists(intermediateExtractedDir)) {
        recovery = true;
        console.printInfo("Starting recovery after failed UNARCHIVE");
    }
    for (Partition p : partitions) {
        checkArchiveProperty(partSpecLevel, recovery, p);
    }
    String archiveName = "data.har";
    FileSystem fs = null;
    try {
        fs = originalDir.getFileSystem(conf);
    } catch (IOException e) {
        throw new HiveException(e);
    }
    // assume the archive is in the original dir, check if it exists
    Path archivePath = new Path(originalDir, archiveName);
    URI archiveUri = archivePath.toUri();
    ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(conf, archiveUri, originalUri);
    URI sourceUri = harHelper.getHarUri(originalUri);
    Path sourceDir = new Path(sourceUri.getScheme(), sourceUri.getAuthority(), sourceUri.getPath());
    if (!pathExists(intermediateArchivedDir) && !pathExists(archivePath)) {
        throw new HiveException("Haven't found any archive where it should be");
    }
    Path tmpPath = driverContext.getCtx().getExternalTmpPath(originalDir);
    try {
        fs = tmpPath.getFileSystem(conf);
    } catch (IOException e) {
        throw new HiveException(e);
    }
    if (!pathExists(intermediateExtractedDir) && !pathExists(intermediateArchivedDir)) {
        try {
            // Copy the files out of the archive into the temporary directory
            String copySource = sourceDir.toString();
            String copyDest = tmpPath.toString();
            List<String> args = new ArrayList<String>();
            args.add("-cp");
            args.add(copySource);
            args.add(copyDest);
            console.printInfo("Copying " + copySource + " to " + copyDest);
            FileSystem srcFs = FileSystem.get(sourceDir.toUri(), conf);
            srcFs.initialize(sourceDir.toUri(), conf);
            FsShell fss = new FsShell(conf);
            int ret = 0;
            try {
                ret = ToolRunner.run(fss, args.toArray(new String[0]));
            } catch (Exception e) {
                e.printStackTrace();
                throw new HiveException(e);
            }
            if (ret != 0) {
                throw new HiveException("Error while copying files from archive, return code=" + ret);
            } else {
                console.printInfo("Successfully Copied " + copySource + " to " + copyDest);
            }
            console.printInfo("Moving " + tmpPath + " to " + intermediateExtractedDir);
            if (fs.exists(intermediateExtractedDir)) {
                throw new HiveException("Invalid state: the intermediate extracted " + "directory already exists.");
            }
            fs.rename(tmpPath, intermediateExtractedDir);
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
    if (!pathExists(intermediateArchivedDir)) {
        try {
            console.printInfo("Moving " + originalDir + " to " + intermediateArchivedDir);
            fs.rename(originalDir, intermediateArchivedDir);
        } catch (IOException e) {
            throw new HiveException(e);
        }
    } else {
        console.printInfo(intermediateArchivedDir + " already exists. " + "Assuming it contains the archived version of the partition");
    }
    // (containing the archived version of the files) to intermediateArchiveDir
    if (!pathExists(originalDir)) {
        try {
            console.printInfo("Moving " + intermediateExtractedDir + " to " + originalDir);
            fs.rename(intermediateExtractedDir, originalDir);
        } catch (IOException e) {
            throw new HiveException(e);
        }
    } else {
        console.printInfo(originalDir + " already exists. " + "Assuming it contains the extracted files in the partition");
    }
    for (Partition p : partitions) {
        setUnArchived(p);
        try {
            db.alterPartition(simpleDesc.getTableName(), p, null);
        } catch (InvalidOperationException e) {
            throw new HiveException(e);
        }
    }
    // deleted. The user will need to call unarchive again to clear those up.
    if (pathExists(intermediateArchivedDir)) {
        deleteDir(intermediateArchivedDir);
    }
    if (recovery) {
        console.printInfo("Recovery after UNARCHIVE succeeded");
    }
    return 0;
}

Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) URI(java.net.URI) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException(java.io.IOException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) StringUtils.stringifyException(org.apache.hadoop.util.StringUtils.stringifyException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SQLException(java.sql.SQLException) FileNotFoundException(java.io.FileNotFoundException) HiveAuthzPluginException(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) PartSpecInfo(org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo) FsShell(org.apache.hadoop.fs.FsShell) FileSystem(org.apache.hadoop.fs.FileSystem) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException)

Example 7 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method compact.

private int compact(Hive db, AlterTableSimpleDesc desc) throws HiveException {
    Table tbl = db.getTable(desc.getTableName());
    if (!AcidUtils.isAcidTable(tbl)) {
        throw new HiveException(ErrorMsg.NONACID_COMPACTION_NOT_SUPPORTED, tbl.getDbName(), tbl.getTableName());
    }
    String partName = null;
    if (desc.getPartSpec() == null) {
        // Compaction can only be done on the whole table if the table is non-partitioned.
        if (tbl.isPartitioned()) {
            throw new HiveException(ErrorMsg.NO_COMPACTION_PARTITION);
        }
    } else {
        Map<String, String> partSpec = desc.getPartSpec();
        List<Partition> partitions = db.getPartitions(tbl, partSpec);
        if (partitions.size() > 1) {
            throw new HiveException(ErrorMsg.TOO_MANY_COMPACTION_PARTITIONS);
        } else if (partitions.size() == 0) {
            throw new HiveException(ErrorMsg.INVALID_PARTITION_SPEC);
        }
        partName = partitions.get(0).getName();
    }
    CompactionResponse resp = db.compact2(tbl.getDbName(), tbl.getTableName(), partName, desc.getCompactionType(), desc.getProps());
    if (resp.isAccepted()) {
        console.printInfo("Compaction enqueued with id " + resp.getId());
    } else {
        console.printInfo("Compaction already enqueued with id " + resp.getId() + "; State is " + resp.getState());
    }
    if (desc.isBlocking() && resp.isAccepted()) {
        StringBuilder progressDots = new StringBuilder();
        long waitTimeMs = 1000;
        wait: while (true) {
            //double wait time until 5min
            waitTimeMs = waitTimeMs * 2;
            waitTimeMs = waitTimeMs < 5 * 60 * 1000 ? waitTimeMs : 5 * 60 * 1000;
            try {
                Thread.sleep(waitTimeMs);
            } catch (InterruptedException ex) {
                console.printInfo("Interrupted while waiting for compaction with id=" + resp.getId());
                break;
            }
            //this could be expensive when there are a lot of compactions....
            //todo: update to search by ID once HIVE-13353 is done
            ShowCompactResponse allCompactions = db.showCompactions();
            for (ShowCompactResponseElement compaction : allCompactions.getCompacts()) {
                if (resp.getId() != compaction.getId()) {
                    continue;
                }
                switch(compaction.getState()) {
                    case TxnStore.WORKING_RESPONSE:
                    case TxnStore.INITIATED_RESPONSE:
                        //still working
                        console.printInfo(progressDots.toString());
                        progressDots.append(".");
                        continue wait;
                    default:
                        //done
                        console.printInfo("Compaction with id " + resp.getId() + " finished with status: " + compaction.getState());
                        break wait;
                }
            }
        }
    }
    return 0;
}

Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) CompactionResponse(org.apache.hadoop.hive.metastore.api.CompactionResponse) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)

Example 8 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method touch.

/**
   * Rewrite the partition's metadata and force the pre/post execute hooks to
   * be fired.
   *
   * @param db
   * @param touchDesc
   * @return
   * @throws HiveException
   */
private int touch(Hive db, AlterTableSimpleDesc touchDesc) throws HiveException {
    Table tbl = db.getTable(touchDesc.getTableName());
    EnvironmentContext environmentContext = new EnvironmentContext();
    environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
    if (touchDesc.getPartSpec() == null) {
        try {
            db.alterTable(touchDesc.getTableName(), tbl, environmentContext);
        } catch (InvalidOperationException e) {
            throw new HiveException("Uable to update table");
        }
        work.getInputs().add(new ReadEntity(tbl));
        addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
    } else {
        Partition part = db.getPartition(tbl, touchDesc.getPartSpec(), false);
        if (part == null) {
            throw new HiveException("Specified partition does not exist");
        }
        try {
            db.alterPartition(touchDesc.getTableName(), part, environmentContext);
        } catch (InvalidOperationException e) {
            throw new HiveException(e);
        }
        work.getInputs().add(new ReadEntity(part));
        addIfAbsentByName(new WriteEntity(part, WriteEntity.WriteType.DDL_NO_LOCK));
    }
    return 0;
}

Also used : EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 9 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method exchangeTablePartition.

private int exchangeTablePartition(Hive db, AlterTableExchangePartition exchangePartition) throws HiveException {
    Map<String, String> partitionSpecs = exchangePartition.getPartitionSpecs();
    Table destTable = exchangePartition.getDestinationTable();
    Table sourceTable = exchangePartition.getSourceTable();
    List<Partition> partitions = db.exchangeTablePartitions(partitionSpecs, sourceTable.getDbName(), sourceTable.getTableName(), destTable.getDbName(), destTable.getTableName());
    for (Partition partition : partitions) {
        // Reuse the partition specs from dest partition since they should be the same
        work.getInputs().add(new ReadEntity(new Partition(sourceTable, partition.getSpec(), null)));
        addIfAbsentByName(new WriteEntity(new Partition(sourceTable, partition.getSpec(), null), WriteEntity.WriteType.DELETE));
        addIfAbsentByName(new WriteEntity(new Partition(destTable, partition.getSpec(), null), WriteEntity.WriteType.INSERT));
    }
    return 0;
}

Example 10 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method alterTable.

/**
   * Alter a given table.
   *
   * @param db
   *          The database in question.
   * @param alterTbl
   *          This is the table we're altering.
   * @return Returns 0 when execution succeeds and above 0 if it fails.
   * @throws HiveException
   *           Throws this exception if an unexpected error occurs.
   */
private int alterTable(Hive db, AlterTableDesc alterTbl) throws HiveException {
    // alter the table
    Table tbl = db.getTable(alterTbl.getOldName());
    List<Partition> allPartitions = null;
    if (alterTbl.getPartSpec() != null) {
        Map<String, String> partSpec = alterTbl.getPartSpec();
        if (DDLSemanticAnalyzer.isFullSpec(tbl, partSpec)) {
            allPartitions = new ArrayList<Partition>();
            Partition part = db.getPartition(tbl, partSpec, false);
            if (part == null) {
                // User provided a fully specified partition spec but it doesn't exist, fail.
                throw new HiveException(ErrorMsg.INVALID_PARTITION, StringUtils.join(alterTbl.getPartSpec().keySet(), ',') + " for table " + alterTbl.getOldName());
            }
            allPartitions.add(part);
        } else {
            // DDLSemanticAnalyzer has already checked if partial partition specs are allowed,
            // thus we should not need to check it here.
            allPartitions = db.getPartitions(tbl, alterTbl.getPartSpec());
        }
    }
    // Don't change the table object returned by the metastore, as we'll mess with it's caches.
    Table oldTbl = tbl;
    tbl = oldTbl.copy();
    if (allPartitions != null) {
        // Alter all partitions
        for (Partition part : allPartitions) {
            alterTableOrSinglePartition(alterTbl, tbl, part);
        }
    } else {
        // Just alter the table
        alterTableOrSinglePartition(alterTbl, tbl, null);
    }
    if (allPartitions == null) {
        updateModifiedParameters(tbl.getTTable().getParameters(), conf);
        tbl.checkValidity(conf);
    } else {
        for (Partition tmpPart : allPartitions) {
            updateModifiedParameters(tmpPart.getParameters(), conf);
        }
    }
    try {
        if (allPartitions == null) {
            db.alterTable(alterTbl.getOldName(), tbl, alterTbl.getIsCascade(), alterTbl.getEnvironmentContext());
        } else {
            db.alterPartitions(tbl.getTableName(), allPartitions, alterTbl.getEnvironmentContext());
        }
    } catch (InvalidOperationException e) {
        LOG.error("alter table: " + stringifyException(e));
        throw new HiveException(e, ErrorMsg.GENERIC_ERROR);
    }
    // Don't acquire locks for any of these, we have already asked for them in DDLSemanticAnalyzer.
    if (allPartitions != null) {
        for (Partition tmpPart : allPartitions) {
            work.getInputs().add(new ReadEntity(tmpPart));
            addIfAbsentByName(new WriteEntity(tmpPart, WriteEntity.WriteType.DDL_NO_LOCK));
        }
    } else {
        work.getInputs().add(new ReadEntity(oldTbl));
        addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
    }
    return 0;
}

Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Aggregations

Partition (org.apache.hadoop.hive.ql.metadata.Partition)83 Table (org.apache.hadoop.hive.ql.metadata.Table)48 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)42 ArrayList (java.util.ArrayList)35 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)23 Path (org.apache.hadoop.fs.Path)21 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)21 HashMap (java.util.HashMap)17 LinkedHashMap (java.util.LinkedHashMap)17 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)16 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)16 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)15 IOException (java.io.IOException)13 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)13 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)12 FileNotFoundException (java.io.FileNotFoundException)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)10 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)10 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)10