Search in sources :

Example 1 with ReplLoadOpType

use of org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils.ReplLoadOpType in project hive by apache.

the class LoadPartitions method forExistingTable.

private TaskTracker forExistingTable(AlterTableAddPartitionDesc lastPartitionReplicated) throws Exception {
    boolean encounteredTheLastReplicatedPartition = (lastPartitionReplicated == null);
    Map<String, String> lastReplicatedPartSpec = null;
    if (!encounteredTheLastReplicatedPartition) {
        lastReplicatedPartSpec = lastPartitionReplicated.getPartitions().get(0).getPartSpec();
        LOG.info("Start processing from partition info spec : {}", StringUtils.mapToString(lastReplicatedPartSpec));
    }
    Iterator<AlterTableAddPartitionDesc> partitionIterator = event.partitionDescriptions(tableDesc).iterator();
    while (!encounteredTheLastReplicatedPartition && partitionIterator.hasNext()) {
        AlterTableAddPartitionDesc addPartitionDesc = partitionIterator.next();
        Map<String, String> currentSpec = addPartitionDesc.getPartitions().get(0).getPartSpec();
        encounteredTheLastReplicatedPartition = lastReplicatedPartSpec.equals(currentSpec);
    }
    // Add Copy task pending for previous partition
    if (PartitionState.Stage.COPY.equals(lastReplicatedStage)) {
        addTasksForPartition(table, lastPartitionReplicated, lastReplicatedPartitionDesc);
    }
    boolean pendingPartitions = false;
    while (partitionIterator.hasNext() && tracker.canAddMoreTasks()) {
        pendingPartitions = true;
        AlterTableAddPartitionDesc addPartitionDesc = partitionIterator.next();
        AlterTableAddPartitionDesc.PartitionDesc src = addPartitionDesc.getPartitions().get(0);
        // Add check point task as part of add partition
        Map<String, String> partParams = new HashMap<>();
        partParams.put(REPL_CHECKPOINT_KEY, context.dumpDirectory);
        Path replicaWarehousePartitionLocation = locationOnReplicaWarehouse(table, src);
        src.setLocation(replicaWarehousePartitionLocation.toString());
        src.addPartParams(partParams);
        Map<String, String> partSpec = src.getPartSpec();
        ReplLoadOpType loadPtnType = getLoadPartitionType(partSpec);
        switch(loadPtnType) {
            case LOAD_NEW:
                break;
            case LOAD_REPLACE:
                tracker.addDependentTask(dropPartitionTask(table, partSpec));
                break;
            case LOAD_SKIP:
                continue;
            default:
                break;
        }
    }
    if (pendingPartitions) {
        addConsolidatedPartitionDesc(lastPartitionReplicated);
    }
    return tracker;
}
Also used : Path(org.apache.hadoop.fs.Path) AlterTableAddPartitionDesc(org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc) HashMap(java.util.HashMap) ImportSemanticAnalyzer.partSpecToString(org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer.partSpecToString) ReplLoadOpType(org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils.ReplLoadOpType)

Example 2 with ReplLoadOpType

use of org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils.ReplLoadOpType in project hive by apache.

the class LoadDatabase method tasks.

public TaskTracker tasks() throws Exception {
    Database dbInMetadata = readDbMetadata();
    String dbName = dbInMetadata.getName();
    Task<?> dbRootTask = null;
    ReplLoadOpType loadDbType = getLoadDbType(dbName);
    switch(loadDbType) {
        case LOAD_NEW:
            dbRootTask = createDbTask(dbInMetadata);
            break;
        case LOAD_REPLACE:
            dbRootTask = alterDbTask(dbInMetadata);
            break;
        default:
            break;
    }
    if (dbRootTask != null) {
        dbRootTask.addDependentTask(setOwnerInfoTask(dbInMetadata));
        tracker.addTask(dbRootTask);
    }
    return tracker;
}
Also used : Database(org.apache.hadoop.hive.metastore.api.Database) ReplLoadOpType(org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils.ReplLoadOpType)

Example 3 with ReplLoadOpType

use of org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils.ReplLoadOpType in project hive by apache.

the class LoadTable method tasks.

public TaskTracker tasks(boolean isBootstrapDuringInc) throws Exception {
    // or are both specified, in which case, that's what we are intended to create the new table as.
    if (event.shouldNotReplicate()) {
        return tracker;
    }
    // this can never be null or empty;
    String dbName = tableContext.dbNameToLoadIn;
    // Create table associated with the import
    // Executed if relevant, and used to contain all the other details about the table if not.
    ImportTableDesc tableDesc = event.tableDesc(dbName);
    Table table = ImportSemanticAnalyzer.tableIfExists(tableDesc, context.hiveDb);
    // Normally, on import, trying to create a table or a partition in a db that does not yet exist
    // is a error condition. However, in the case of a REPL LOAD, it is possible that we are trying
    // to create tasks to create a table inside a db that as-of-now does not exist, but there is
    // a precursor Task waiting that will create it before this is encountered. Thus, we instantiate
    // defaults and do not error out in that case.
    // the above will change now since we are going to split replication load in multiple execution
    // tasks and hence we could have created the database earlier in which case the waitOnPrecursor will
    // be false and hence if db Not found we should error out.
    Database parentDb = context.hiveDb.getDatabase(tableDesc.getDatabaseName());
    if (parentDb == null) {
        if (!tableContext.waitOnPrecursor()) {
            throw new SemanticException(ErrorMsg.DATABASE_NOT_EXISTS.getMsg(tableDesc.getDatabaseName()));
        }
    }
    Task<?> tblRootTask = null;
    ReplLoadOpType loadTblType = getLoadTableType(table, isBootstrapDuringInc);
    switch(loadTblType) {
        case LOAD_NEW:
            break;
        case LOAD_REPLACE:
            tblRootTask = dropTableTask(table);
            break;
        case LOAD_SKIP:
            return tracker;
        default:
            break;
    }
    TableLocationTuple tableLocationTuple = tableLocation(tableDesc, parentDb, tableContext, context);
    tableDesc.setLocation(tableLocationTuple.location);
    /* Note: In the following section, Metadata-only import handling logic is
       interleaved with regular repl-import logic. The rule of thumb being
       followed here is that MD-only imports are essentially ALTERs. They do
       not load data, and should not be "creating" any metadata - they should
       be replacing instead. The only place it makes sense for a MD-only import
       to create is in the case of a table that's been dropped and recreated,
       or in the case of an unpartitioned table. In all other cases, it should
       behave like a noop or a pure MD alter.
    */
    newTableTasks(tableDesc, tblRootTask, tableLocationTuple);
    // Set Checkpoint task as dependant to create table task. So, if same dump is retried for
    // bootstrap, we skip current table update.
    Task<?> ckptTask = ReplUtils.getTableCheckpointTask(tableDesc, null, context.dumpDirectory, this.metricCollector, context.hiveConf);
    if (!isPartitioned(tableDesc)) {
        Task<?> replLogTask = ReplUtils.getTableReplLogTask(tableDesc, replLogger, context.hiveConf, metricCollector, (new Path(context.dumpDirectory)).getParent().toString());
        ckptTask.addDependentTask(replLogTask);
    }
    tracker.addDependentTask(ckptTask);
    return tracker;
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.apache.hadoop.hive.ql.metadata.Table) ImportTableDesc(org.apache.hadoop.hive.ql.plan.ImportTableDesc) Database(org.apache.hadoop.hive.metastore.api.Database) ReplLoadOpType(org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils.ReplLoadOpType) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

ReplLoadOpType (org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils.ReplLoadOpType)3 Path (org.apache.hadoop.fs.Path)2 Database (org.apache.hadoop.hive.metastore.api.Database)2 HashMap (java.util.HashMap)1 AlterTableAddPartitionDesc (org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc)1 Table (org.apache.hadoop.hive.ql.metadata.Table)1 ImportSemanticAnalyzer.partSpecToString (org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer.partSpecToString)1 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)1 ImportTableDesc (org.apache.hadoop.hive.ql.plan.ImportTableDesc)1