Search in sources :

Example 16 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class RenamePartitionHandler method handle.

@Override
public List<Task<?>> handle(Context context) throws SemanticException {
    AlterPartitionMessage msg = deserializer.getAlterPartitionMessage(context.dmd.getPayload());
    String actualDbName = context.isDbNameEmpty() ? msg.getDB() : context.dbName;
    String actualTblName = msg.getTable();
    Map<String, String> newPartSpec = new LinkedHashMap<>();
    Map<String, String> oldPartSpec = new LinkedHashMap<>();
    TableName tableName = TableName.fromString(actualTblName, null, actualDbName);
    Table tableObj;
    ReplicationSpec replicationSpec = context.eventOnlyReplicationSpec();
    try {
        Iterator<String> beforeIterator = msg.getPtnObjBefore().getValuesIterator();
        Iterator<String> afterIterator = msg.getPtnObjAfter().getValuesIterator();
        tableObj = msg.getTableObj();
        for (FieldSchema fs : tableObj.getPartitionKeys()) {
            oldPartSpec.put(fs.getName(), beforeIterator.next());
            newPartSpec.put(fs.getName(), afterIterator.next());
        }
        AlterTableRenamePartitionDesc renamePtnDesc = new AlterTableRenamePartitionDesc(tableName, oldPartSpec, newPartSpec, replicationSpec, null);
        renamePtnDesc.setWriteId(msg.getWriteId());
        Task<DDLWork> renamePtnTask = TaskFactory.get(new DDLWork(readEntitySet, writeEntitySet, renamePtnDesc, true, context.getDumpDirectory(), context.getMetricCollector()), context.hiveConf);
        context.log.debug("Added rename ptn task : {}:{}->{}", renamePtnTask.getId(), oldPartSpec, newPartSpec);
        updatedMetadata.set(context.dmd.getEventTo().toString(), actualDbName, actualTblName, newPartSpec);
        return ReplUtils.addChildTask(renamePtnTask);
    } catch (Exception e) {
        throw (e instanceof SemanticException) ? (SemanticException) e : new SemanticException("Error reading message members", e);
    }
}
Also used : ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) Table(org.apache.hadoop.hive.metastore.api.Table) AlterTableRenamePartitionDesc(org.apache.hadoop.hive.ql.ddl.table.partition.rename.AlterTableRenamePartitionDesc) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) LinkedHashMap(java.util.LinkedHashMap) TableName(org.apache.hadoop.hive.common.TableName) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) AlterPartitionMessage(org.apache.hadoop.hive.metastore.messaging.AlterPartitionMessage) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 17 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class LoadPartitions method forExistingTable.

private TaskTracker forExistingTable(AddPartitionDesc lastPartitionReplicated) throws Exception {
    boolean encounteredTheLastReplicatedPartition = (lastPartitionReplicated == null);
    Map<String, String> lastReplicatedPartSpec = null;
    if (!encounteredTheLastReplicatedPartition) {
        lastReplicatedPartSpec = lastPartitionReplicated.getPartition(0).getPartSpec();
        LOG.info("Start processing from partition info spec : {}", StringUtils.mapToString(lastReplicatedPartSpec));
    }
    ReplicationSpec replicationSpec = event.replicationSpec();
    Iterator<AddPartitionDesc> partitionIterator = event.partitionDescriptions(tableDesc).iterator();
    while (!encounteredTheLastReplicatedPartition && partitionIterator.hasNext()) {
        AddPartitionDesc addPartitionDesc = partitionIterator.next();
        Map<String, String> currentSpec = addPartitionDesc.getPartition(0).getPartSpec();
        encounteredTheLastReplicatedPartition = lastReplicatedPartSpec.equals(currentSpec);
    }
    while (partitionIterator.hasNext() && tracker.canAddMoreTasks()) {
        AddPartitionDesc addPartitionDesc = partitionIterator.next();
        Map<String, String> partSpec = addPartitionDesc.getPartition(0).getPartSpec();
        Partition ptn = context.hiveDb.getPartition(table, partSpec, false);
        if (ptn == null) {
            if (!replicationSpec.isMetadataOnly()) {
                addPartition(partitionIterator.hasNext(), addPartitionDesc);
            }
        } else {
            // the destination ptn's repl.last.id is older than the replacement's.
            if (replicationSpec.allowReplacementInto(ptn.getParameters())) {
                if (replicationSpec.isMetadataOnly()) {
                    tracker.addTask(alterSinglePartition(addPartitionDesc, replicationSpec, ptn));
                    if (!tracker.canAddMoreTasks()) {
                        tracker.setReplicationState(new ReplicationState(new PartitionState(table.getTableName(), addPartitionDesc)));
                    }
                } else {
                    addPartition(partitionIterator.hasNext(), addPartitionDesc);
                }
            } else {
            // ignore this ptn, do nothing, not an error.
            }
        }
    }
    return tracker;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc) PartitionState(org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.ReplicationState.PartitionState) ImportSemanticAnalyzer.partSpecToString(org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer.partSpecToString) ReplicationState(org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.ReplicationState)

Example 18 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class LoadTable method tasks.

public TaskTracker tasks() throws SemanticException {
    // or are both specified, in which case, that's what we are intended to create the new table as.
    try {
        if (event.shouldNotReplicate()) {
            return tracker;
        }
        // this can never be null or empty;
        String dbName = tableContext.dbNameToLoadIn;
        // Create table associated with the import
        // Executed if relevant, and used to contain all the other details about the table if not.
        ImportTableDesc tableDesc = tableContext.overrideProperties(event.tableDesc(dbName));
        Table table = ImportSemanticAnalyzer.tableIfExists(tableDesc, context.hiveDb);
        ReplicationSpec replicationSpec = event.replicationSpec();
        // Normally, on import, trying to create a table or a partition in a db that does not yet exist
        // is a error condition. However, in the case of a REPL LOAD, it is possible that we are trying
        // to create tasks to create a table inside a db that as-of-now does not exist, but there is
        // a precursor Task waiting that will create it before this is encountered. Thus, we instantiate
        // defaults and do not error out in that case.
        // the above will change now since we are going to split replication load in multiple execution
        // tasks and hence we could have created the database earlier in which case the waitOnPrecursor will
        // be false and hence if db Not found we should error out.
        Database parentDb = context.hiveDb.getDatabase(tableDesc.getDatabaseName());
        if (parentDb == null) {
            if (!tableContext.waitOnPrecursor()) {
                throw new SemanticException(ErrorMsg.DATABASE_NOT_EXISTS.getMsg(tableDesc.getDatabaseName()));
            }
        }
        if (table == null) {
            // If table doesn't exist, allow creating a new one only if the database state is older than the update.
            if ((parentDb != null) && (!replicationSpec.allowReplacementInto(parentDb.getParameters()))) {
                // If the target table exists and is newer or same as current update based on repl.last.id, then just noop it.
                return tracker;
            }
        } else {
            if (!replicationSpec.allowReplacementInto(table.getParameters())) {
                // If the target table exists and is newer or same as current update based on repl.last.id, then just noop it.
                return tracker;
            }
        }
        if (tableDesc.getLocation() == null) {
            tableDesc.setLocation(location(tableDesc, parentDb));
        }
        /* Note: In the following section, Metadata-only import handling logic is
     interleaved with regular repl-import logic. The rule of thumb being
     followed here is that MD-only imports are essentially ALTERs. They do
     not load data, and should not be "creating" any metadata - they should
     be replacing instead. The only place it makes sense for a MD-only import
     to create is in the case of a table that's been dropped and recreated,
     or in the case of an unpartitioned table. In all other cases, it should
     behave like a noop or a pure MD alter.
  */
        if (table == null) {
            newTableTasks(tableDesc);
        } else {
            existingTableTasks(tableDesc, table, replicationSpec);
        }
        if (!isPartitioned(tableDesc)) {
            createTableReplLogTask(tableDesc.getTableName(), tableDesc.tableType());
        }
        return tracker;
    } catch (Exception e) {
        throw new SemanticException(e);
    }
}
Also used : ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) Table(org.apache.hadoop.hive.ql.metadata.Table) ImportTableDesc(org.apache.hadoop.hive.ql.plan.ImportTableDesc) Database(org.apache.hadoop.hive.metastore.api.Database) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) IOException(java.io.IOException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 19 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class DDLTask method dropPartitions.

private void dropPartitions(Hive db, Table tbl, DropTableDesc dropTbl) throws HiveException {
    ReplicationSpec replicationSpec = dropTbl.getReplicationSpec();
    if (replicationSpec.isInReplicationScope()) {
        // parameter key values.
        for (DropTableDesc.PartSpec partSpec : dropTbl.getPartSpecs()) {
            List<Partition> partitions = new ArrayList<>();
            try {
                db.getPartitionsByExpr(tbl, partSpec.getPartSpec(), conf, partitions);
                for (Partition p : Iterables.filter(partitions, replicationSpec.allowEventReplacementInto())) {
                    db.dropPartition(tbl.getDbName(), tbl.getTableName(), p.getValues(), true);
                }
            } catch (NoSuchObjectException e) {
            // ignore NSOE because that means there's nothing to drop.
            } catch (Exception e) {
                throw new HiveException(e.getMessage(), e);
            }
        }
        return;
    }
    // ifExists is currently verified in DDLSemanticAnalyzer
    List<Partition> droppedParts = db.dropPartitions(dropTbl.getTableName(), dropTbl.getPartSpecs(), PartitionDropOptions.instance().deleteData(true).ifExists(true).purgeData(dropTbl.getIfPurge()));
    for (Partition partition : droppedParts) {
        console.printInfo("Dropped the partition " + partition.getName());
        // We have already locked the table, don't lock the partitions.
        addIfAbsentByName(new WriteEntity(partition, WriteEntity.WriteType.DDL_NO_LOCK));
    }
    ;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DropTableDesc(org.apache.hadoop.hive.ql.plan.DropTableDesc) ArrayList(java.util.ArrayList) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) SQLException(java.sql.SQLException) FileNotFoundException(java.io.FileNotFoundException) HiveAuthzPluginException(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException)

Example 20 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class RenameTableHandler method handle.

@Override
public List<Task<?>> handle(Context context) throws SemanticException {
    AlterTableMessage msg = deserializer.getAlterTableMessage(context.dmd.getPayload());
    try {
        Table tableObjBefore = msg.getTableObjBefore();
        Table tableObjAfter = msg.getTableObjAfter();
        String oldDbName = tableObjBefore.getDbName();
        String newDbName = tableObjAfter.getDbName();
        if (!context.isDbNameEmpty()) {
            // newDbName must be the same
            if (!oldDbName.equalsIgnoreCase(newDbName)) {
                throw new SemanticException("Cannot replicate an event renaming a table across" + " databases into a db level load " + oldDbName + "->" + newDbName);
            } else {
                // both were the same, and can be replaced by the new db we're loading into.
                oldDbName = context.dbName;
                newDbName = context.dbName;
            }
        }
        TableName oldName = TableName.fromString(tableObjBefore.getTableName(), null, oldDbName);
        TableName newName = TableName.fromString(tableObjAfter.getTableName(), null, newDbName);
        ReplicationSpec replicationSpec = context.eventOnlyReplicationSpec();
        AlterTableRenameDesc renameTableDesc = new AlterTableRenameDesc(oldName, replicationSpec, false, newName.getNotEmptyDbTable());
        renameTableDesc.setWriteId(msg.getWriteId());
        Task<DDLWork> renameTableTask = TaskFactory.get(new DDLWork(readEntitySet, writeEntitySet, renameTableDesc, true, context.getDumpDirectory(), context.getMetricCollector()), context.hiveConf);
        context.log.debug("Added rename table task : {}:{}->{}", renameTableTask.getId(), oldName.getNotEmptyDbTable(), newName.getNotEmptyDbTable());
        // oldDbName and newDbName *will* be the same if we're here
        updatedMetadata.set(context.dmd.getEventTo().toString(), newDbName, tableObjAfter.getTableName(), null);
        // if so. If that should ever change, this will need reworking.
        return ReplUtils.addChildTask(renameTableTask);
    } catch (Exception e) {
        throw (e instanceof SemanticException) ? (SemanticException) e : new SemanticException("Error reading message members", e);
    }
}
Also used : TableName(org.apache.hadoop.hive.common.TableName) ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) Table(org.apache.hadoop.hive.metastore.api.Table) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) AlterTableMessage(org.apache.hadoop.hive.metastore.messaging.AlterTableMessage) AlterTableRenameDesc(org.apache.hadoop.hive.ql.ddl.table.misc.rename.AlterTableRenameDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

ReplicationSpec (org.apache.hadoop.hive.ql.parse.ReplicationSpec)24 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)11 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)9 Table (org.apache.hadoop.hive.ql.metadata.Table)8 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)7 Database (org.apache.hadoop.hive.metastore.api.Database)6 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)6 ArrayList (java.util.ArrayList)5 Partition (org.apache.hadoop.hive.ql.metadata.Partition)5 IOException (java.io.IOException)4 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)4 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)4 Path (org.apache.hadoop.fs.Path)3 TableName (org.apache.hadoop.hive.common.TableName)3 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)3 Task (org.apache.hadoop.hive.ql.exec.Task)3 FileNotFoundException (java.io.FileNotFoundException)2 HashMap (java.util.HashMap)2 LinkedHashMap (java.util.LinkedHashMap)2 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)2