use of org.apache.hudi.integ.testsuite.dag.nodes.InsertNode in project hudi by apache.
the class SimpleWorkflowDagGenerator method build.
@Override
public WorkflowDag build() {
DagNode root = new InsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToInsert(100).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
DagNode child1 = new InsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToInsert(100).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
root.addChildNode(child1);
DagNode child1OfChild1 = new UpsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToUpdate(100).withNumUpsertPartitions(2).withNumTimesToRepeat(1).withRecordSize(1000).build());
// Tests running 2 nodes in parallel
child1.addChildNode(child1OfChild1);
List<Pair<String, Integer>> queryAndResult = new ArrayList<>();
queryAndResult.add(Pair.of("select " + "count(*) from testdb1.table1 group " + "by rider having count(*) < 1", 0));
DagNode child2OfChild1 = new HiveQueryNode(DeltaConfig.Config.newBuilder().withHiveQueryAndResults(queryAndResult).withHiveLocal(true).build());
child1.addChildNode(child2OfChild1);
List<DagNode> rootNodes = new ArrayList<>();
rootNodes.add(root);
return new WorkflowDag(rootNodes);
}
use of org.apache.hudi.integ.testsuite.dag.nodes.InsertNode in project hudi by apache.
the class ComplexDagGenerator method build.
@Override
public WorkflowDag build() {
// root node
DagNode root = new InsertNode(Config.newBuilder().withNumRecordsToInsert(1000).withNumInsertPartitions(3).withRecordSize(1000).build());
// child node1
DagNode child1 = new UpsertNode(Config.newBuilder().withNumRecordsToUpdate(999).withNumRecordsToInsert(1000).withNumUpsertFiles(1).withNumUpsertPartitions(1).withNumInsertPartitions(1).withRecordSize(10000).build());
// function used to build ValidateNode
Function<List<DagNode<JavaRDD<WriteStatus>>>, Boolean> function = (dagNodes) -> {
DagNode<JavaRDD<WriteStatus>> parent1 = dagNodes.get(0);
List<WriteStatus> statuses = parent1.getResult().collect();
long totalRecordsTouched = statuses.stream().map(st -> st.getStat().getNumUpdateWrites() + st.getStat().getNumInserts()).reduce((a, b) -> a + b).get();
boolean b1 = totalRecordsTouched == parent1.getConfig().getNumRecordsInsert() + parent1.getConfig().getNumRecordsUpsert();
boolean b2 = statuses.size() > parent1.getConfig().getNumUpsertFiles();
DagNode<JavaRDD<WriteStatus>> parent2 = parent1.getParentNodes().get(0);
statuses = parent2.getResult().collect();
totalRecordsTouched = statuses.stream().map(st -> st.getStat().getNumUpdateWrites() + st.getStat().getNumInserts()).reduce((a, b) -> a + b).get();
boolean b3 = totalRecordsTouched == parent2.getConfig().getNumRecordsInsert() * parent2.getConfig().getNumInsertPartitions() + parent2.getConfig().getNumRecordsUpsert();
return b1 & b2 & b3;
};
// child node2
DagNode child2 = new ValidateNode(Config.newBuilder().build(), function);
// create relationship between nodes
root.addChildNode(child1);
// child1.addParentNode(root);
child1.addChildNode(child2);
// child2.addParentNode(child1);
List<DagNode> rootNodes = new ArrayList<>();
rootNodes.add(root);
return new WorkflowDag(rootNodes);
}
use of org.apache.hudi.integ.testsuite.dag.nodes.InsertNode in project hudi by apache.
the class TestWorkflowBuilder method testWorkloadOperationSequenceBuilder.
@Test
public void testWorkloadOperationSequenceBuilder() {
DagNode root = new InsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToInsert(10000).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
DagNode child1 = new UpsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToUpdate(10000).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
root.addChildNode(child1);
child1.addParentNode(root);
List<DagNode> rootNodes = new ArrayList<>();
rootNodes.add(root);
WorkflowDag workflowDag = new WorkflowDag(rootNodes);
assertEquals(workflowDag.getNodeList().size(), 1);
assertEquals(((DagNode) workflowDag.getNodeList().get(0)).getChildNodes().size(), 1);
DagNode dagNode = (DagNode) workflowDag.getNodeList().get(0);
assertTrue(dagNode instanceof InsertNode);
DeltaConfig.Config config = dagNode.getConfig();
assertEquals(config.getNumInsertPartitions(), 1);
assertEquals(config.getRecordSize(), 1000);
assertEquals(config.getRepeatCount(), 2);
assertEquals(config.getNumRecordsInsert(), 10000);
assertEquals(config.getNumRecordsUpsert(), 0);
dagNode = (DagNode) ((DagNode) workflowDag.getNodeList().get(0)).getChildNodes().get(0);
assertTrue(dagNode instanceof UpsertNode);
config = dagNode.getConfig();
assertEquals(config.getNumInsertPartitions(), 1);
assertEquals(config.getRecordSize(), 1000);
assertEquals(config.getRepeatCount(), 2);
assertEquals(config.getNumRecordsInsert(), 0);
assertEquals(config.getNumRecordsUpsert(), 10000);
}
use of org.apache.hudi.integ.testsuite.dag.nodes.InsertNode in project hudi by apache.
the class HiveSyncDagGenerator method build.
@Override
public WorkflowDag build() {
DagNode root = new InsertNode(Config.newBuilder().withNumRecordsToInsert(100).withNumInsertPartitions(1).withNumTimesToRepeat(1).withRecordSize(1000).build());
DagNode child1 = new HiveSyncNode(Config.newBuilder().build());
root.addChildNode(child1);
DagNode child2 = new HiveQueryNode(Config.newBuilder().withHiveQueryAndResults(Arrays.asList(Pair.of("select " + "count(*) from testdb1.table1 group " + "by rider having count(*) < 1", 0))).build());
child1.addChildNode(child2);
List<DagNode> rootNodes = new ArrayList<>();
rootNodes.add(root);
return new WorkflowDag(rootNodes);
}
use of org.apache.hudi.integ.testsuite.dag.nodes.InsertNode in project hudi by apache.
the class HiveSyncDagGeneratorMOR method build.
@Override
public WorkflowDag build() {
DagNode root = new InsertNode(Config.newBuilder().withNumRecordsToInsert(100).withNumInsertPartitions(1).withNumTimesToRepeat(1).withRecordSize(1000).build());
DagNode child1 = new HiveSyncNode(Config.newBuilder().withHiveLocal(true).build());
root.addChildNode(child1);
DagNode child2 = new HiveQueryNode(Config.newBuilder().withHiveLocal(true).withHiveQueryAndResults(Arrays.asList(Pair.of("select " + "count(*) from testdb1.hive_trips group " + "by rider having count(*) < 1", 0), Pair.of("select " + "count(*) from testdb1.hive_trips ", 100))).withHiveProperties(Arrays.asList("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat")).build());
child1.addChildNode(child2);
List<DagNode> rootNodes = new ArrayList<>();
rootNodes.add(root);
return new WorkflowDag(rootNodes);
}
Aggregations