use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.
the class DagScheduler method execute.
/**
* Method to start executing the nodes in workflow DAGs.
*
* @param service ExecutorService
* @param workflowDag instance of workflow dag that needs to be executed
* @throws Exception will be thrown if ant error occurred
*/
private void execute(ExecutorService service, WorkflowDag workflowDag) throws Exception {
// Nodes at the same level are executed in parallel
log.info("Running workloads");
List<DagNode> nodes = workflowDag.getNodeList();
int curRound = 1;
do {
log.warn("===================================================================");
log.warn("Running workloads for round num " + curRound);
log.warn("===================================================================");
Queue<DagNode> queue = new PriorityQueue<>();
for (DagNode dagNode : nodes) {
queue.add(dagNode.clone());
}
do {
List<Future> futures = new ArrayList<>();
Set<DagNode> childNodes = new HashSet<>();
while (queue.size() > 0) {
DagNode nodeToExecute = queue.poll();
log.warn("Executing node \"" + nodeToExecute.getConfig().getOtherConfigs().get(CONFIG_NAME) + "\" :: " + nodeToExecute.getConfig());
int finalCurRound = curRound;
futures.add(service.submit(() -> executeNode(nodeToExecute, finalCurRound)));
if (nodeToExecute.getChildNodes().size() > 0) {
childNodes.addAll(nodeToExecute.getChildNodes());
}
}
queue.addAll(childNodes);
childNodes.clear();
for (Future future : futures) {
future.get(1, TimeUnit.HOURS);
}
} while (queue.size() > 0);
log.info("Finished workloads for round num " + curRound);
if (curRound < workflowDag.getRounds()) {
new DelayNode(workflowDag.getIntermittentDelayMins()).execute(executionContext, curRound);
}
// After each level, report and flush the metrics
Metrics.flush();
} while (curRound++ < workflowDag.getRounds());
log.info("Finished workloads");
}
use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.
the class DagUtils method convertJsonToDagNode.
private static DagNode convertJsonToDagNode(Map<String, DagNode> allNodes, String name, JsonNode node) throws IOException {
String type = node.get(DeltaConfig.Config.TYPE).asText();
final DagNode retNode = convertJsonToDagNode(node, type, name);
Arrays.asList(node.get(DeltaConfig.Config.DEPENDENCIES).textValue().split(",")).stream().forEach(dep -> {
DagNode parentNode = allNodes.get(dep);
if (parentNode != null) {
parentNode.addChildNode(retNode);
}
});
return retNode;
}
use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.
the class SimpleWorkflowDagGenerator method build.
@Override
public WorkflowDag build() {
DagNode root = new InsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToInsert(100).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
DagNode child1 = new InsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToInsert(100).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
root.addChildNode(child1);
DagNode child1OfChild1 = new UpsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToUpdate(100).withNumUpsertPartitions(2).withNumTimesToRepeat(1).withRecordSize(1000).build());
// Tests running 2 nodes in parallel
child1.addChildNode(child1OfChild1);
List<Pair<String, Integer>> queryAndResult = new ArrayList<>();
queryAndResult.add(Pair.of("select " + "count(*) from testdb1.table1 group " + "by rider having count(*) < 1", 0));
DagNode child2OfChild1 = new HiveQueryNode(DeltaConfig.Config.newBuilder().withHiveQueryAndResults(queryAndResult).withHiveLocal(true).build());
child1.addChildNode(child2OfChild1);
List<DagNode> rootNodes = new ArrayList<>();
rootNodes.add(root);
return new WorkflowDag(rootNodes);
}
use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.
the class ComplexDagGenerator method build.
@Override
public WorkflowDag build() {
// root node
DagNode root = new InsertNode(Config.newBuilder().withNumRecordsToInsert(1000).withNumInsertPartitions(3).withRecordSize(1000).build());
// child node1
DagNode child1 = new UpsertNode(Config.newBuilder().withNumRecordsToUpdate(999).withNumRecordsToInsert(1000).withNumUpsertFiles(1).withNumUpsertPartitions(1).withNumInsertPartitions(1).withRecordSize(10000).build());
// function used to build ValidateNode
Function<List<DagNode<JavaRDD<WriteStatus>>>, Boolean> function = (dagNodes) -> {
DagNode<JavaRDD<WriteStatus>> parent1 = dagNodes.get(0);
List<WriteStatus> statuses = parent1.getResult().collect();
long totalRecordsTouched = statuses.stream().map(st -> st.getStat().getNumUpdateWrites() + st.getStat().getNumInserts()).reduce((a, b) -> a + b).get();
boolean b1 = totalRecordsTouched == parent1.getConfig().getNumRecordsInsert() + parent1.getConfig().getNumRecordsUpsert();
boolean b2 = statuses.size() > parent1.getConfig().getNumUpsertFiles();
DagNode<JavaRDD<WriteStatus>> parent2 = parent1.getParentNodes().get(0);
statuses = parent2.getResult().collect();
totalRecordsTouched = statuses.stream().map(st -> st.getStat().getNumUpdateWrites() + st.getStat().getNumInserts()).reduce((a, b) -> a + b).get();
boolean b3 = totalRecordsTouched == parent2.getConfig().getNumRecordsInsert() * parent2.getConfig().getNumInsertPartitions() + parent2.getConfig().getNumRecordsUpsert();
return b1 & b2 & b3;
};
// child node2
DagNode child2 = new ValidateNode(Config.newBuilder().build(), function);
// create relationship between nodes
root.addChildNode(child1);
// child1.addParentNode(root);
child1.addChildNode(child2);
// child2.addParentNode(child1);
List<DagNode> rootNodes = new ArrayList<>();
rootNodes.add(root);
return new WorkflowDag(rootNodes);
}
use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.
the class TestDagUtils method testConvertYamlToDag.
@Test
public void testConvertYamlToDag() throws Exception {
WorkflowDag dag = DagUtils.convertYamlToDag(UtilitiesTestBase.Helpers.readFileFromAbsolutePath((System.getProperty("user.dir") + "/.." + COW_DAG_DOCKER_DEMO_RELATIVE_PATH)));
assertEquals(dag.getDagName(), "unit-test-cow-dag");
assertEquals(dag.getRounds(), 1);
assertEquals(dag.getIntermittentDelayMins(), 10);
assertEquals(dag.getNodeList().size(), 1);
Assertions.assertEquals(((DagNode) dag.getNodeList().get(0)).getParentNodes().size(), 0);
assertEquals(((DagNode) dag.getNodeList().get(0)).getChildNodes().size(), 1);
DagNode firstChild = (DagNode) ((DagNode) dag.getNodeList().get(0)).getChildNodes().get(0);
assertEquals(firstChild.getParentNodes().size(), 1);
assertEquals(firstChild.getChildNodes().size(), 1);
assertEquals(((DagNode) firstChild.getChildNodes().get(0)).getChildNodes().size(), 1);
}
Aggregations