use of org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils.ReplLoadOpType in project hive by apache.
the class LoadPartitions method forExistingTable.
private TaskTracker forExistingTable(AlterTableAddPartitionDesc lastPartitionReplicated) throws Exception {
boolean encounteredTheLastReplicatedPartition = (lastPartitionReplicated == null);
Map<String, String> lastReplicatedPartSpec = null;
if (!encounteredTheLastReplicatedPartition) {
lastReplicatedPartSpec = lastPartitionReplicated.getPartitions().get(0).getPartSpec();
LOG.info("Start processing from partition info spec : {}", StringUtils.mapToString(lastReplicatedPartSpec));
}
Iterator<AlterTableAddPartitionDesc> partitionIterator = event.partitionDescriptions(tableDesc).iterator();
while (!encounteredTheLastReplicatedPartition && partitionIterator.hasNext()) {
AlterTableAddPartitionDesc addPartitionDesc = partitionIterator.next();
Map<String, String> currentSpec = addPartitionDesc.getPartitions().get(0).getPartSpec();
encounteredTheLastReplicatedPartition = lastReplicatedPartSpec.equals(currentSpec);
}
// Add Copy task pending for previous partition
if (PartitionState.Stage.COPY.equals(lastReplicatedStage)) {
addTasksForPartition(table, lastPartitionReplicated, lastReplicatedPartitionDesc);
}
boolean pendingPartitions = false;
while (partitionIterator.hasNext() && tracker.canAddMoreTasks()) {
pendingPartitions = true;
AlterTableAddPartitionDesc addPartitionDesc = partitionIterator.next();
AlterTableAddPartitionDesc.PartitionDesc src = addPartitionDesc.getPartitions().get(0);
// Add check point task as part of add partition
Map<String, String> partParams = new HashMap<>();
partParams.put(REPL_CHECKPOINT_KEY, context.dumpDirectory);
Path replicaWarehousePartitionLocation = locationOnReplicaWarehouse(table, src);
src.setLocation(replicaWarehousePartitionLocation.toString());
src.addPartParams(partParams);
Map<String, String> partSpec = src.getPartSpec();
ReplLoadOpType loadPtnType = getLoadPartitionType(partSpec);
switch(loadPtnType) {
case LOAD_NEW:
break;
case LOAD_REPLACE:
tracker.addDependentTask(dropPartitionTask(table, partSpec));
break;
case LOAD_SKIP:
continue;
default:
break;
}
}
if (pendingPartitions) {
addConsolidatedPartitionDesc(lastPartitionReplicated);
}
return tracker;
}
use of org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils.ReplLoadOpType in project hive by apache.
the class LoadDatabase method tasks.
public TaskTracker tasks() throws Exception {
Database dbInMetadata = readDbMetadata();
String dbName = dbInMetadata.getName();
Task<?> dbRootTask = null;
ReplLoadOpType loadDbType = getLoadDbType(dbName);
switch(loadDbType) {
case LOAD_NEW:
dbRootTask = createDbTask(dbInMetadata);
break;
case LOAD_REPLACE:
dbRootTask = alterDbTask(dbInMetadata);
break;
default:
break;
}
if (dbRootTask != null) {
dbRootTask.addDependentTask(setOwnerInfoTask(dbInMetadata));
tracker.addTask(dbRootTask);
}
return tracker;
}
use of org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils.ReplLoadOpType in project hive by apache.
the class LoadTable method tasks.
public TaskTracker tasks(boolean isBootstrapDuringInc) throws Exception {
// or are both specified, in which case, that's what we are intended to create the new table as.
if (event.shouldNotReplicate()) {
return tracker;
}
// this can never be null or empty;
String dbName = tableContext.dbNameToLoadIn;
// Create table associated with the import
// Executed if relevant, and used to contain all the other details about the table if not.
ImportTableDesc tableDesc = event.tableDesc(dbName);
Table table = ImportSemanticAnalyzer.tableIfExists(tableDesc, context.hiveDb);
// Normally, on import, trying to create a table or a partition in a db that does not yet exist
// is a error condition. However, in the case of a REPL LOAD, it is possible that we are trying
// to create tasks to create a table inside a db that as-of-now does not exist, but there is
// a precursor Task waiting that will create it before this is encountered. Thus, we instantiate
// defaults and do not error out in that case.
// the above will change now since we are going to split replication load in multiple execution
// tasks and hence we could have created the database earlier in which case the waitOnPrecursor will
// be false and hence if db Not found we should error out.
Database parentDb = context.hiveDb.getDatabase(tableDesc.getDatabaseName());
if (parentDb == null) {
if (!tableContext.waitOnPrecursor()) {
throw new SemanticException(ErrorMsg.DATABASE_NOT_EXISTS.getMsg(tableDesc.getDatabaseName()));
}
}
Task<?> tblRootTask = null;
ReplLoadOpType loadTblType = getLoadTableType(table, isBootstrapDuringInc);
switch(loadTblType) {
case LOAD_NEW:
break;
case LOAD_REPLACE:
tblRootTask = dropTableTask(table);
break;
case LOAD_SKIP:
return tracker;
default:
break;
}
TableLocationTuple tableLocationTuple = tableLocation(tableDesc, parentDb, tableContext, context);
tableDesc.setLocation(tableLocationTuple.location);
/* Note: In the following section, Metadata-only import handling logic is
interleaved with regular repl-import logic. The rule of thumb being
followed here is that MD-only imports are essentially ALTERs. They do
not load data, and should not be "creating" any metadata - they should
be replacing instead. The only place it makes sense for a MD-only import
to create is in the case of a table that's been dropped and recreated,
or in the case of an unpartitioned table. In all other cases, it should
behave like a noop or a pure MD alter.
*/
newTableTasks(tableDesc, tblRootTask, tableLocationTuple);
// Set Checkpoint task as dependant to create table task. So, if same dump is retried for
// bootstrap, we skip current table update.
Task<?> ckptTask = ReplUtils.getTableCheckpointTask(tableDesc, null, context.dumpDirectory, this.metricCollector, context.hiveConf);
if (!isPartitioned(tableDesc)) {
Task<?> replLogTask = ReplUtils.getTableReplLogTask(tableDesc, replLogger, context.hiveConf, metricCollector, (new Path(context.dumpDirectory)).getParent().toString());
ckptTask.addDependentTask(replLogTask);
}
tracker.addDependentTask(ckptTask);
return tracker;
}
Aggregations