Search in sources :

Example 1 with DagNode

use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.

the class DagScheduler method execute.

/**
 * Method to start executing the nodes in workflow DAGs.
 *
 * @param service ExecutorService
 * @param workflowDag instance of workflow dag that needs to be executed
 * @throws Exception will be thrown if ant error occurred
 */
private void execute(ExecutorService service, WorkflowDag workflowDag) throws Exception {
    // Nodes at the same level are executed in parallel
    log.info("Running workloads");
    List<DagNode> nodes = workflowDag.getNodeList();
    int curRound = 1;
    do {
        log.warn("===================================================================");
        log.warn("Running workloads for round num " + curRound);
        log.warn("===================================================================");
        Queue<DagNode> queue = new PriorityQueue<>();
        for (DagNode dagNode : nodes) {
            queue.add(dagNode.clone());
        }
        do {
            List<Future> futures = new ArrayList<>();
            Set<DagNode> childNodes = new HashSet<>();
            while (queue.size() > 0) {
                DagNode nodeToExecute = queue.poll();
                log.warn("Executing node \"" + nodeToExecute.getConfig().getOtherConfigs().get(CONFIG_NAME) + "\" :: " + nodeToExecute.getConfig());
                int finalCurRound = curRound;
                futures.add(service.submit(() -> executeNode(nodeToExecute, finalCurRound)));
                if (nodeToExecute.getChildNodes().size() > 0) {
                    childNodes.addAll(nodeToExecute.getChildNodes());
                }
            }
            queue.addAll(childNodes);
            childNodes.clear();
            for (Future future : futures) {
                future.get(1, TimeUnit.HOURS);
            }
        } while (queue.size() > 0);
        log.info("Finished workloads for round num " + curRound);
        if (curRound < workflowDag.getRounds()) {
            new DelayNode(workflowDag.getIntermittentDelayMins()).execute(executionContext, curRound);
        }
        // After each level, report and flush the metrics
        Metrics.flush();
    } while (curRound++ < workflowDag.getRounds());
    log.info("Finished workloads");
}
Also used : DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) PriorityQueue(java.util.PriorityQueue) DelayNode(org.apache.hudi.integ.testsuite.dag.nodes.DelayNode) HashSet(java.util.HashSet)

Example 2 with DagNode

use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.

the class DagUtils method convertJsonToDagNode.

private static DagNode convertJsonToDagNode(Map<String, DagNode> allNodes, String name, JsonNode node) throws IOException {
    String type = node.get(DeltaConfig.Config.TYPE).asText();
    final DagNode retNode = convertJsonToDagNode(node, type, name);
    Arrays.asList(node.get(DeltaConfig.Config.DEPENDENCIES).textValue().split(",")).stream().forEach(dep -> {
        DagNode parentNode = allNodes.get(dep);
        if (parentNode != null) {
            parentNode.addChildNode(retNode);
        }
    });
    return retNode;
}
Also used : DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode)

Example 3 with DagNode

use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.

the class SimpleWorkflowDagGenerator method build.

@Override
public WorkflowDag build() {
    DagNode root = new InsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToInsert(100).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
    DagNode child1 = new InsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToInsert(100).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
    root.addChildNode(child1);
    DagNode child1OfChild1 = new UpsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToUpdate(100).withNumUpsertPartitions(2).withNumTimesToRepeat(1).withRecordSize(1000).build());
    // Tests running 2 nodes in parallel
    child1.addChildNode(child1OfChild1);
    List<Pair<String, Integer>> queryAndResult = new ArrayList<>();
    queryAndResult.add(Pair.of("select " + "count(*) from testdb1.table1 group " + "by rider having count(*) < 1", 0));
    DagNode child2OfChild1 = new HiveQueryNode(DeltaConfig.Config.newBuilder().withHiveQueryAndResults(queryAndResult).withHiveLocal(true).build());
    child1.addChildNode(child2OfChild1);
    List<DagNode> rootNodes = new ArrayList<>();
    rootNodes.add(root);
    return new WorkflowDag(rootNodes);
}
Also used : DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) ArrayList(java.util.ArrayList) HiveQueryNode(org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode) InsertNode(org.apache.hudi.integ.testsuite.dag.nodes.InsertNode) UpsertNode(org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode) Pair(org.apache.hudi.common.util.collection.Pair)

Example 4 with DagNode

use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.

the class ComplexDagGenerator method build.

@Override
public WorkflowDag build() {
    // root node
    DagNode root = new InsertNode(Config.newBuilder().withNumRecordsToInsert(1000).withNumInsertPartitions(3).withRecordSize(1000).build());
    // child node1
    DagNode child1 = new UpsertNode(Config.newBuilder().withNumRecordsToUpdate(999).withNumRecordsToInsert(1000).withNumUpsertFiles(1).withNumUpsertPartitions(1).withNumInsertPartitions(1).withRecordSize(10000).build());
    // function used to build ValidateNode
    Function<List<DagNode<JavaRDD<WriteStatus>>>, Boolean> function = (dagNodes) -> {
        DagNode<JavaRDD<WriteStatus>> parent1 = dagNodes.get(0);
        List<WriteStatus> statuses = parent1.getResult().collect();
        long totalRecordsTouched = statuses.stream().map(st -> st.getStat().getNumUpdateWrites() + st.getStat().getNumInserts()).reduce((a, b) -> a + b).get();
        boolean b1 = totalRecordsTouched == parent1.getConfig().getNumRecordsInsert() + parent1.getConfig().getNumRecordsUpsert();
        boolean b2 = statuses.size() > parent1.getConfig().getNumUpsertFiles();
        DagNode<JavaRDD<WriteStatus>> parent2 = parent1.getParentNodes().get(0);
        statuses = parent2.getResult().collect();
        totalRecordsTouched = statuses.stream().map(st -> st.getStat().getNumUpdateWrites() + st.getStat().getNumInserts()).reduce((a, b) -> a + b).get();
        boolean b3 = totalRecordsTouched == parent2.getConfig().getNumRecordsInsert() * parent2.getConfig().getNumInsertPartitions() + parent2.getConfig().getNumRecordsUpsert();
        return b1 & b2 & b3;
    };
    // child node2
    DagNode child2 = new ValidateNode(Config.newBuilder().build(), function);
    // create relationship between nodes
    root.addChildNode(child1);
    // child1.addParentNode(root);
    child1.addChildNode(child2);
    // child2.addParentNode(child1);
    List<DagNode> rootNodes = new ArrayList<>();
    rootNodes.add(root);
    return new WorkflowDag(rootNodes);
}
Also used : UpsertNode(org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode) WriteStatus(org.apache.hudi.client.WriteStatus) DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) List(java.util.List) InsertNode(org.apache.hudi.integ.testsuite.dag.nodes.InsertNode) Config(org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config) Function(java.util.function.Function) JavaRDD(org.apache.spark.api.java.JavaRDD) ArrayList(java.util.ArrayList) ValidateNode(org.apache.hudi.integ.testsuite.dag.nodes.ValidateNode) ValidateNode(org.apache.hudi.integ.testsuite.dag.nodes.ValidateNode) ArrayList(java.util.ArrayList) UpsertNode(org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode) JavaRDD(org.apache.spark.api.java.JavaRDD) DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) List(java.util.List) ArrayList(java.util.ArrayList) InsertNode(org.apache.hudi.integ.testsuite.dag.nodes.InsertNode) WriteStatus(org.apache.hudi.client.WriteStatus)

Example 5 with DagNode

use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.

the class TestDagUtils method testConvertYamlToDag.

@Test
public void testConvertYamlToDag() throws Exception {
    WorkflowDag dag = DagUtils.convertYamlToDag(UtilitiesTestBase.Helpers.readFileFromAbsolutePath((System.getProperty("user.dir") + "/.." + COW_DAG_DOCKER_DEMO_RELATIVE_PATH)));
    assertEquals(dag.getDagName(), "unit-test-cow-dag");
    assertEquals(dag.getRounds(), 1);
    assertEquals(dag.getIntermittentDelayMins(), 10);
    assertEquals(dag.getNodeList().size(), 1);
    Assertions.assertEquals(((DagNode) dag.getNodeList().get(0)).getParentNodes().size(), 0);
    assertEquals(((DagNode) dag.getNodeList().get(0)).getChildNodes().size(), 1);
    DagNode firstChild = (DagNode) ((DagNode) dag.getNodeList().get(0)).getChildNodes().get(0);
    assertEquals(firstChild.getParentNodes().size(), 1);
    assertEquals(firstChild.getChildNodes().size(), 1);
    assertEquals(((DagNode) firstChild.getChildNodes().get(0)).getChildNodes().size(), 1);
}
Also used : DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) Test(org.junit.jupiter.api.Test)

Aggregations

DagNode (org.apache.hudi.integ.testsuite.dag.nodes.DagNode)12 ArrayList (java.util.ArrayList)6 InsertNode (org.apache.hudi.integ.testsuite.dag.nodes.InsertNode)5 HiveQueryNode (org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode)3 UpsertNode (org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode)3 Test (org.junit.jupiter.api.Test)3 WorkflowDag (org.apache.hudi.integ.testsuite.dag.WorkflowDag)2 HiveSyncNode (org.apache.hudi.integ.testsuite.dag.nodes.HiveSyncNode)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)1 YAMLFactory (com.fasterxml.jackson.dataformat.yaml.YAMLFactory)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Entry (java.util.Map.Entry)1 PriorityQueue (java.util.PriorityQueue)1 Future (java.util.concurrent.Future)1 Function (java.util.function.Function)1