Search in sources :

Example 6 with AddDependencyToLeaves

use of org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves in project hive by apache.

the class LoadFunction method createFunctionReplLogTask.

private void createFunctionReplLogTask(List<Task<?>> functionTasks, String functionName) {
    ReplStateLogWork replLogWork = new ReplStateLogWork(replLogger, functionName, dumpDirectory, metricCollector);
    Task<ReplStateLogWork> replLogTask = TaskFactory.get(replLogWork, context.hiveConf);
    DAGTraversal.traverse(functionTasks, new AddDependencyToLeaves(replLogTask));
}
Also used : ReplStateLogWork(org.apache.hadoop.hive.ql.exec.repl.ReplStateLogWork) AddDependencyToLeaves(org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves)

Example 7 with AddDependencyToLeaves

use of org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves in project hive by apache.

the class AddDependencyToLeavesTest method shouldNotSkipIntermediateDependencyCollectionTasks.

@Test
public void shouldNotSkipIntermediateDependencyCollectionTasks() {
    Task<DependencyCollectionWork> collectionWorkTaskOne = TaskFactory.get(new DependencyCollectionWork());
    Task<DependencyCollectionWork> collectionWorkTaskTwo = TaskFactory.get(new DependencyCollectionWork());
    Task<DependencyCollectionWork> collectionWorkTaskThree = TaskFactory.get(new DependencyCollectionWork());
    @SuppressWarnings("unchecked") Task<?> rootTask = mock(Task.class);
    when(rootTask.getDependentTasks()).thenReturn(Arrays.asList(collectionWorkTaskOne, collectionWorkTaskTwo, collectionWorkTaskThree));
    @SuppressWarnings("unchecked") List<Task<?>> tasksPostCurrentGraph = Arrays.asList(mock(Task.class), mock(Task.class));
    DAGTraversal.traverse(Collections.singletonList(rootTask), new AddDependencyToLeaves(tasksPostCurrentGraph));
    List<Task<?>> dependentTasksForOne = collectionWorkTaskOne.getDependentTasks();
    List<Task<?>> dependentTasksForTwo = collectionWorkTaskTwo.getDependentTasks();
    List<Task<?>> dependentTasksForThree = collectionWorkTaskThree.getDependentTasks();
    assertEquals(dependentTasksForOne.size(), 2);
    assertEquals(dependentTasksForTwo.size(), 2);
    assertEquals(dependentTasksForThree.size(), 2);
    assertTrue(tasksPostCurrentGraph.containsAll(dependentTasksForOne));
    assertTrue(tasksPostCurrentGraph.containsAll(dependentTasksForTwo));
    assertTrue(tasksPostCurrentGraph.containsAll(dependentTasksForThree));
// assertTrue(dependentTasksForOne.iterator().next() instanceof DependencyCollectionTask);
// assertTrue(dependentTasksForTwo.iterator().next() instanceof DependencyCollectionTask);
// assertTrue(dependentTasksForThree.iterator().next() instanceof DependencyCollectionTask);
}
Also used : Task(org.apache.hadoop.hive.ql.exec.Task) DependencyCollectionWork(org.apache.hadoop.hive.ql.plan.DependencyCollectionWork) AddDependencyToLeaves(org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves) Test(org.junit.Test)

Example 8 with AddDependencyToLeaves

use of org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves in project hive by apache.

the class CommitTxnHandler method handle.

@Override
public List<Task<?>> handle(Context context) throws SemanticException {
    if (!AcidUtils.isAcidEnabled(context.hiveConf)) {
        context.log.error("Cannot load transaction events as acid is not enabled");
        throw new SemanticException("Cannot load transaction events as acid is not enabled");
    }
    CommitTxnMessage msg = deserializer.getCommitTxnMessage(context.dmd.getPayload());
    int numEntry = (msg.getTables() == null ? 0 : msg.getTables().size());
    List<Task<?>> tasks = new ArrayList<>();
    String dbName = context.dbName;
    String tableNamePrev = null;
    String tblName = null;
    ReplTxnWork work = new ReplTxnWork(HiveUtils.getReplPolicy(context.dbName), context.dbName, null, msg.getTxnId(), ReplTxnWork.OperationType.REPL_COMMIT_TXN, context.eventOnlyReplicationSpec(), context.getDumpDirectory(), context.getMetricCollector());
    if (numEntry > 0) {
        context.log.debug("Commit txn handler for txnid " + msg.getTxnId() + " databases : " + msg.getDatabases() + " tables : " + msg.getTables() + " partitions : " + msg.getPartitions() + " files : " + msg.getFilesList() + " write ids : " + msg.getWriteIds());
    }
    for (int idx = 0; idx < numEntry; idx++) {
        String actualTblName = msg.getTables().get(idx);
        String actualDBName = msg.getDatabases().get(idx);
        String completeName = Table.getCompleteName(actualDBName, actualTblName);
        // grouped together in commit txn message.
        if (tableNamePrev == null || !(completeName.equals(tableNamePrev))) {
            // The data location is created by source, so the location should be formed based on the table name in msg.
            Path location = HiveUtils.getDumpPath(new Path(context.location), actualDBName, actualTblName);
            tblName = actualTblName;
            // for warehouse level dump, use db name from write event
            dbName = (context.isDbNameEmpty() ? actualDBName : context.dbName);
            Context currentContext = new Context(context, dbName, context.getDumpDirectory(), context.getMetricCollector());
            currentContext.setLocation(location.toUri().toString());
            // Piggybacking in Import logic for now
            TableHandler tableHandler = new TableHandler();
            tasks.addAll((tableHandler.handle(currentContext)));
            readEntitySet.addAll(tableHandler.readEntities());
            writeEntitySet.addAll(tableHandler.writeEntities());
            getUpdatedMetadata().copyUpdatedMetadata(tableHandler.getUpdatedMetadata());
            tableNamePrev = completeName;
        }
        try {
            WriteEventInfo writeEventInfo = new WriteEventInfo(msg.getWriteIds().get(idx), dbName, tblName, msg.getFiles(idx));
            if (msg.getPartitions().get(idx) != null && !msg.getPartitions().get(idx).isEmpty()) {
                writeEventInfo.setPartition(msg.getPartitions().get(idx));
            }
            work.addWriteEventInfo(writeEventInfo);
        } catch (Exception e) {
            throw new SemanticException("Failed to extract write event info from commit txn message : " + e.getMessage());
        }
    }
    Task<ReplTxnWork> commitTxnTask = TaskFactory.get(work, context.hiveConf);
    // Anyways, if this event gets executed again, it is taken care of.
    if (!context.isDbNameEmpty()) {
        updatedMetadata.set(context.dmd.getEventTo().toString(), context.dbName, null, null);
    }
    context.log.debug("Added Commit txn task : {}", commitTxnTask.getId());
    if (tasks.isEmpty()) {
        // will be used for setting the last repl id.
        return Collections.singletonList(commitTxnTask);
    }
    DAGTraversal.traverse(tasks, new AddDependencyToLeaves(commitTxnTask));
    return tasks;
}
Also used : Path(org.apache.hadoop.fs.Path) Task(org.apache.hadoop.hive.ql.exec.Task) ArrayList(java.util.ArrayList) CommitTxnMessage(org.apache.hadoop.hive.metastore.messaging.CommitTxnMessage) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) WriteEventInfo(org.apache.hadoop.hive.metastore.api.WriteEventInfo) ReplTxnWork(org.apache.hadoop.hive.ql.plan.ReplTxnWork) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) AddDependencyToLeaves(org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves)

Example 9 with AddDependencyToLeaves

use of org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves in project hive by apache.

the class ReplLoadTask method createReplLoadCompleteAckTask.

private void createReplLoadCompleteAckTask() {
    if (!work.hasBootstrapLoadTasks() && (work.isIncrementalLoad() ? !work.incrementalLoadTasksBuilder().hasMoreWork() : true)) {
        // All repl load tasks are executed and status is 0, create the task to add the acknowledgement
        List<PreAckTask> listOfPreAckTasks = new LinkedList<>();
        listOfPreAckTasks.add(new PreAckTask() {

            @Override
            public void run() throws SemanticException {
                try {
                    HiveMetaStoreClient metaStoreClient = new HiveMetaStoreClient(conf);
                    long currentNotificationID = metaStoreClient.getCurrentNotificationEventId().getEventId();
                    Path loadMetadataFilePath = new Path(work.dumpDirectory, LOAD_METADATA.toString());
                    Utils.writeOutput(String.valueOf(currentNotificationID), loadMetadataFilePath, conf);
                    LOG.info("Created LOAD Metadata file : {} with NotificationID : {}", loadMetadataFilePath, currentNotificationID);
                } catch (TException ex) {
                    throw new SemanticException(ex);
                }
            }
        });
        if (work.shouldFailover()) {
            listOfPreAckTasks.add(new PreAckTask() {

                @Override
                public void run() throws SemanticException {
                    try {
                        Database db = getHive().getDatabase(work.getTargetDatabase());
                        if (MetaStoreUtils.isDbBeingFailedOverAtEndpoint(db, MetaStoreUtils.FailoverEndpoint.TARGET)) {
                            return;
                        }
                        Map<String, String> params = db.getParameters();
                        if (params == null) {
                            params = new HashMap<>();
                            db.setParameters(params);
                        }
                        LOG.info("Setting failover endpoint:{} to TARGET for database: {}", ReplConst.REPL_FAILOVER_ENDPOINT, db.getName());
                        params.put(ReplConst.REPL_FAILOVER_ENDPOINT, MetaStoreUtils.FailoverEndpoint.TARGET.toString());
                        getHive().alterDatabase(work.getTargetDatabase(), db);
                    } catch (HiveException e) {
                        throw new SemanticException(e);
                    }
                }
            });
        }
        if (work.isSecondFailover) {
            // If it is the second load of optimised bootstrap that means this is the end of the cycle, add tasks to sort
            // out the database properties.
            listOfPreAckTasks.add(new PreAckTask() {

                @Override
                public void run() throws SemanticException {
                    try {
                        Hive hiveDb = getHive();
                        Database db = hiveDb.getDatabase(work.getTargetDatabase());
                        LinkedHashMap<String, String> params = new LinkedHashMap<>(db.getParameters());
                        LOG.debug("Database {} properties before removal {}", work.getTargetDatabase(), params);
                        params.remove(SOURCE_OF_REPLICATION);
                        db.setParameters(params);
                        LOG.info("Removed {} property from database {} after successful optimised bootstrap load.", SOURCE_OF_REPLICATION, work.getTargetDatabase());
                        hiveDb.alterDatabase(work.getTargetDatabase(), db);
                        LOG.debug("Database {} poperties after removal {}", work.getTargetDatabase(), params);
                    } catch (HiveException e) {
                        throw new SemanticException(e);
                    }
                }
            });
        }
        AckWork replLoadAckWork = new AckWork(new Path(work.dumpDirectory, LOAD_ACKNOWLEDGEMENT.toString()), work.getMetricCollector(), listOfPreAckTasks);
        Task<AckWork> loadAckWorkTask = TaskFactory.get(replLoadAckWork, conf);
        if (childTasks.isEmpty()) {
            childTasks.add(loadAckWorkTask);
        } else {
            DAGTraversal.traverse(childTasks, new AddDependencyToLeaves(Collections.singletonList(loadAckWorkTask)));
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TException(org.apache.thrift.TException) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) LinkedList(java.util.LinkedList) LinkedHashMap(java.util.LinkedHashMap) Hive(org.apache.hadoop.hive.ql.metadata.Hive) LoadDatabase(org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.LoadDatabase) Database(org.apache.hadoop.hive.metastore.api.Database) AlterDatabase(org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.LoadDatabase.AlterDatabase) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) AddDependencyToLeaves(org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves)

Example 10 with AddDependencyToLeaves

use of org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves in project hive by apache.

the class ReplLoadTask method updateDatabaseLastReplID.

/**
 * There was a database update done before and we want to make sure we update the last repl
 * id on this database as we are now going to switch to processing a new database.
 * This has to be last task in the graph since if there are intermediate tasks and the last.repl.id
 * is a root level task then in the execution phase the root level tasks will get executed first,
 * however if any of the child tasks of the bootstrap load failed then even though the bootstrap has failed
 * the last repl status of the target database will return a valid value, which will not represent
 * the state of the database.
 */
private TaskTracker updateDatabaseLastReplID(int maxTasks, Context context, Scope scope) throws SemanticException {
    /*
    we don't want to put any limits on this task as this is essential before we start
    processing new database events.
   */
    TaskTracker taskTracker = new AlterDatabase(context, work.databaseEvent(context.hiveConf), work.dbNameToLoadIn, new TaskTracker(maxTasks), work.getMetricCollector()).tasks();
    AddDependencyToLeaves function = new AddDependencyToLeaves(taskTracker.tasks());
    DAGTraversal.traverse(scope.rootTasks, function);
    return taskTracker;
}
Also used : TaskTracker(org.apache.hadoop.hive.ql.exec.repl.util.TaskTracker) AlterDatabase(org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.LoadDatabase.AlterDatabase) AddDependencyToLeaves(org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves)

Aggregations

AddDependencyToLeaves (org.apache.hadoop.hive.ql.exec.repl.util.AddDependencyToLeaves)11 Task (org.apache.hadoop.hive.ql.exec.Task)6 Path (org.apache.hadoop.fs.Path)5 HashMap (java.util.HashMap)4 AlterDatabase (org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.LoadDatabase.AlterDatabase)4 ArrayList (java.util.ArrayList)3 Database (org.apache.hadoop.hive.metastore.api.Database)3 LoadDatabase (org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.LoadDatabase)3 TaskTracker (org.apache.hadoop.hive.ql.exec.repl.util.TaskTracker)3 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)3 DependencyCollectionWork (org.apache.hadoop.hive.ql.plan.DependencyCollectionWork)3 LinkedHashMap (java.util.LinkedHashMap)2 Map (java.util.Map)2 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)2 ReplStateLogWork (org.apache.hadoop.hive.ql.exec.repl.ReplStateLogWork)2 Hive (org.apache.hadoop.hive.ql.metadata.Hive)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 TException (org.apache.thrift.TException)2 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1