Search in sources :

Example 11 with AddPartitionDesc

use of org.apache.hadoop.hive.ql.plan.AddPartitionDesc in project hive by apache.

the class LoadPartitions method forNewTable.

private TaskTracker forNewTable() throws Exception {
    Iterator<AddPartitionDesc> iterator = event.partitionDescriptions(tableDesc).iterator();
    while (iterator.hasNext() && tracker.canAddMoreTasks()) {
        AddPartitionDesc currentPartitionDesc = iterator.next();
        /*
       the currentPartitionDesc cannot be inlined as we need the hasNext() to be evaluated post the
       current retrieved lastReplicatedPartition
      */
        addPartition(iterator.hasNext(), currentPartitionDesc);
    }
    return tracker;
}
Also used : AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc)

Example 12 with AddPartitionDesc

use of org.apache.hadoop.hive.ql.plan.AddPartitionDesc in project hive by apache.

the class FSTableEvent method partitionDesc.

private AddPartitionDesc partitionDesc(Path fromPath, ImportTableDesc tblDesc, Partition partition) throws SemanticException {
    try {
        AddPartitionDesc partsDesc = new AddPartitionDesc(tblDesc.getDatabaseName(), tblDesc.getTableName(), EximUtil.makePartSpec(tblDesc.getPartCols(), partition.getValues()), partition.getSd().getLocation(), partition.getParameters());
        AddPartitionDesc.OnePartitionDesc partDesc = partsDesc.getPartition(0);
        partDesc.setInputFormat(partition.getSd().getInputFormat());
        partDesc.setOutputFormat(partition.getSd().getOutputFormat());
        partDesc.setNumBuckets(partition.getSd().getNumBuckets());
        partDesc.setCols(partition.getSd().getCols());
        partDesc.setSerializationLib(partition.getSd().getSerdeInfo().getSerializationLib());
        partDesc.setSerdeParams(partition.getSd().getSerdeInfo().getParameters());
        partDesc.setBucketCols(partition.getSd().getBucketCols());
        partDesc.setSortCols(partition.getSd().getSortCols());
        partDesc.setLocation(new Path(fromPath, Warehouse.makePartName(tblDesc.getPartCols(), partition.getValues())).toString());
        partsDesc.setReplicationSpec(metadata.getReplicationSpec());
        return partsDesc;
    } catch (Exception e) {
        throw new SemanticException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 13 with AddPartitionDesc

use of org.apache.hadoop.hive.ql.plan.AddPartitionDesc in project hive by apache.

the class DDLTask method createPartitionsInBatches.

@VisibleForTesting
void createPartitionsInBatches(Hive db, List<String> repairOutput, Set<CheckResult.PartitionResult> partsNotInMs, Table table, int batchSize, int decayingFactor, int maxRetries) throws Exception {
    String addMsgFormat = "Repair: Added partition to metastore " + table.getTableName() + ":%s";
    Set<CheckResult.PartitionResult> batchWork = new HashSet<>(partsNotInMs);
    new RetryUtilities.ExponentiallyDecayingBatchWork<Void>(batchSize, decayingFactor, maxRetries) {

        @Override
        public Void execute(int size) throws Exception {
            while (!batchWork.isEmpty()) {
                // get the current batch size
                int currentBatchSize = size;
                AddPartitionDesc apd = new AddPartitionDesc(table.getDbName(), table.getTableName(), true);
                // store the partitions temporarily until processed
                List<CheckResult.PartitionResult> lastBatch = new ArrayList<>(currentBatchSize);
                List<String> addMsgs = new ArrayList<>(currentBatchSize);
                // add the number of partitions given by the current batchsize
                for (CheckResult.PartitionResult part : batchWork) {
                    if (currentBatchSize == 0) {
                        break;
                    }
                    apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null);
                    lastBatch.add(part);
                    addMsgs.add(String.format(addMsgFormat, part.getPartitionName()));
                    currentBatchSize--;
                }
                db.createPartitions(apd);
                // if last batch is successful remove it from partsNotInMs
                batchWork.removeAll(lastBatch);
                repairOutput.addAll(addMsgs);
            }
            return null;
        }
    }.run();
}
Also used : RetryUtilities(org.apache.hive.common.util.RetryUtilities) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) SQLException(java.sql.SQLException) FileNotFoundException(java.io.FileNotFoundException) HiveAuthzPluginException(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) CheckResult(org.apache.hadoop.hive.ql.metadata.CheckResult) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) ArrayList(java.util.ArrayList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) AbstractList(java.util.AbstractList) List(java.util.List) HashSet(java.util.HashSet) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 14 with AddPartitionDesc

use of org.apache.hadoop.hive.ql.plan.AddPartitionDesc in project hive by apache.

the class DDLSemanticAnalyzer method analyzeAlterTableAddParts.

/**
 * Add one or more partitions to a table. Useful when the data has been copied
 * to the right location by some other process.
 *
 * @param ast
 *          The parsed command tree.
 *
 * @param expectView
 *          True for ALTER VIEW, false for ALTER TABLE.
 *
 * @throws SemanticException
 *           Parsing failed
 */
private void analyzeAlterTableAddParts(String[] qualified, CommonTree ast, boolean expectView) throws SemanticException {
    // ^(TOK_ALTERTABLE_ADDPARTS identifier ifNotExists? alterStatementSuffixAddPartitionsElement+)
    boolean ifNotExists = ast.getChild(0).getType() == HiveParser.TOK_IFNOTEXISTS;
    Table tab = getTable(qualified);
    boolean isView = tab.isView();
    validateAlterTableType(tab, AlterTableTypes.ADDPARTITION, expectView);
    outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED));
    int numCh = ast.getChildCount();
    int start = ifNotExists ? 1 : 0;
    String currentLocation = null;
    Map<String, String> currentPart = null;
    // Parser has done some verification, so the order of tokens doesn't need to be verified here.
    AddPartitionDesc addPartitionDesc = new AddPartitionDesc(tab.getDbName(), tab.getTableName(), ifNotExists);
    for (int num = start; num < numCh; num++) {
        ASTNode child = (ASTNode) ast.getChild(num);
        switch(child.getToken().getType()) {
            case HiveParser.TOK_PARTSPEC:
                if (currentPart != null) {
                    addPartitionDesc.addPartition(currentPart, currentLocation);
                    currentLocation = null;
                }
                currentPart = getValidatedPartSpec(tab, child, conf, true);
                // validate reserved values
                validatePartitionValues(currentPart);
                break;
            case HiveParser.TOK_PARTITIONLOCATION:
                // if location specified, set in partition
                if (isView) {
                    throw new SemanticException("LOCATION clause illegal for view partition");
                }
                currentLocation = unescapeSQLString(child.getChild(0).getText());
                inputs.add(toReadEntity(currentLocation));
                break;
            default:
                throw new SemanticException("Unknown child: " + child);
        }
    }
    // add the last one
    if (currentPart != null) {
        addPartitionDesc.addPartition(currentPart, currentLocation);
    }
    if (this.conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
        for (int index = 0; index < addPartitionDesc.getPartitionCount(); index++) {
            OnePartitionDesc desc = addPartitionDesc.getPartition(index);
            if (desc.getLocation() == null) {
                if (desc.getPartParams() == null) {
                    desc.setPartParams(new HashMap<String, String>());
                }
                StatsSetupConst.setStatsStateForCreateTable(desc.getPartParams(), MetaStoreUtils.getColumnNames(tab.getCols()), StatsSetupConst.TRUE);
            }
        }
    }
    if (addPartitionDesc.getPartitionCount() == 0) {
        // nothing to do
        return;
    }
    rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), addPartitionDesc)));
    if (isView) {
        // Compile internal query to capture underlying table partition dependencies
        StringBuilder cmd = new StringBuilder();
        cmd.append("SELECT * FROM ");
        cmd.append(HiveUtils.unparseIdentifier(getDotName(qualified)));
        cmd.append(" WHERE ");
        boolean firstOr = true;
        for (int i = 0; i < addPartitionDesc.getPartitionCount(); ++i) {
            AddPartitionDesc.OnePartitionDesc partitionDesc = addPartitionDesc.getPartition(i);
            if (firstOr) {
                firstOr = false;
            } else {
                cmd.append(" OR ");
            }
            boolean firstAnd = true;
            cmd.append("(");
            for (Map.Entry<String, String> entry : partitionDesc.getPartSpec().entrySet()) {
                if (firstAnd) {
                    firstAnd = false;
                } else {
                    cmd.append(" AND ");
                }
                cmd.append(HiveUtils.unparseIdentifier(entry.getKey()));
                cmd.append(" = '");
                cmd.append(HiveUtils.escapeString(entry.getValue()));
                cmd.append("'");
            }
            cmd.append(")");
        }
        SessionState ss = SessionState.get();
        String uName = (ss == null ? null : ss.getUserName());
        Driver driver = new Driver(conf, uName, queryState.getLineageState());
        int rc = driver.compile(cmd.toString(), false);
        if (rc != 0) {
            throw new SemanticException(ErrorMsg.NO_VALID_PARTN.getMsg());
        }
        inputs.addAll(driver.getPlan().getInputs());
    }
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) Table(org.apache.hadoop.hive.ql.metadata.Table) Driver(org.apache.hadoop.hive.ql.Driver) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) OnePartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc.OnePartitionDesc) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc) OnePartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc.OnePartitionDesc) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap)

Example 15 with AddPartitionDesc

use of org.apache.hadoop.hive.ql.plan.AddPartitionDesc in project hive by apache.

the class ImportSemanticAnalyzer method addSinglePartition.

private static Task<?> addSinglePartition(URI fromURI, FileSystem fs, ImportTableDesc tblDesc, Table table, Warehouse wh, AddPartitionDesc addPartitionDesc, ReplicationSpec replicationSpec, EximUtil.SemanticAnalyzerWrapperContext x, Long writeId, int stmtId, boolean isSourceMm, Task<?> commitTask) throws MetaException, IOException, HiveException {
    AddPartitionDesc.OnePartitionDesc partSpec = addPartitionDesc.getPartition(0);
    if (tblDesc.isExternal() && tblDesc.getLocation() == null) {
        x.getLOG().debug("Importing in-place: adding AddPart for partition " + partSpecToString(partSpec.getPartSpec()));
        // addPartitionDesc already has the right partition location
        @SuppressWarnings("unchecked") Task<?> addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc));
        return addPartTask;
    } else {
        String srcLocation = partSpec.getLocation();
        fixLocationInPartSpec(fs, tblDesc, table, wh, replicationSpec, partSpec, x);
        x.getLOG().debug("adding dependent CopyWork/AddPart/MoveWork for partition " + partSpecToString(partSpec.getPartSpec()) + " with source location: " + srcLocation);
        Path tgtLocation = new Path(partSpec.getLocation());
        Path destPath = !AcidUtils.isInsertOnlyTable(table.getParameters()) ? x.getCtx().getExternalTmpPath(tgtLocation) : new Path(tgtLocation, AcidUtils.deltaSubdir(writeId, writeId, stmtId));
        Path moveTaskSrc = !AcidUtils.isInsertOnlyTable(table.getParameters()) ? destPath : tgtLocation;
        if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
            Utilities.FILE_OP_LOGGER.trace("adding import work for partition with source location: " + srcLocation + "; target: " + tgtLocation + "; copy dest " + destPath + "; mm " + writeId + " (src " + isSourceMm + ") for " + partSpecToString(partSpec.getPartSpec()));
        }
        Task<?> copyTask = null;
        if (replicationSpec.isInReplicationScope()) {
            if (isSourceMm || isAcid(writeId)) {
                // Note: this is replication gap, not MM gap... Repl V2 is not ready yet.
                throw new RuntimeException("Replicating MM and ACID tables is not supported");
            }
            copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, new Path(srcLocation), destPath, x.getConf());
        } else {
            CopyWork cw = new CopyWork(new Path(srcLocation), destPath, false);
            cw.setSkipSourceMmDirs(isSourceMm);
            copyTask = TaskFactory.get(cw);
        }
        Task<?> addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc));
        // Note: this sets LoadFileType incorrectly for ACID; is that relevant for import?
        // See setLoadFileType and setIsAcidIow calls elsewhere for an example.
        LoadTableDesc loadTableWork = new LoadTableDesc(moveTaskSrc, Utilities.getTableDesc(table), partSpec.getPartSpec(), replicationSpec.isReplace() ? LoadFileType.REPLACE_ALL : LoadFileType.OVERWRITE_EXISTING, writeId);
        loadTableWork.setStmtId(stmtId);
        loadTableWork.setInheritTableSpecs(false);
        Task<?> loadPartTask = TaskFactory.get(new MoveWork(x.getInputs(), x.getOutputs(), loadTableWork, null, false));
        copyTask.addDependentTask(loadPartTask);
        addPartTask.addDependentTask(loadPartTask);
        x.getTasks().add(copyTask);
        if (commitTask != null) {
            loadPartTask.addDependentTask(commitTask);
        }
        return addPartTask;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) CopyWork(org.apache.hadoop.hive.ql.plan.CopyWork) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc)

Aggregations

AddPartitionDesc (org.apache.hadoop.hive.ql.plan.AddPartitionDesc)17 Path (org.apache.hadoop.fs.Path)11 Table (org.apache.hadoop.hive.ql.metadata.Table)7 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)5 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 Database (org.apache.hadoop.hive.metastore.api.Database)4 IOException (java.io.IOException)3 URISyntaxException (java.net.URISyntaxException)3 ArrayList (java.util.ArrayList)3 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)3 URI (java.net.URI)2 HashSet (java.util.HashSet)2 Warehouse (org.apache.hadoop.hive.metastore.Warehouse)2 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)2 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)2 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)2 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)2