Search in sources :

Example 1 with UpsertNode

use of org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode in project hudi by apache.

the class SimpleWorkflowDagGenerator method build.

@Override
public WorkflowDag build() {
    DagNode root = new InsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToInsert(100).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
    DagNode child1 = new InsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToInsert(100).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
    root.addChildNode(child1);
    DagNode child1OfChild1 = new UpsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToUpdate(100).withNumUpsertPartitions(2).withNumTimesToRepeat(1).withRecordSize(1000).build());
    // Tests running 2 nodes in parallel
    child1.addChildNode(child1OfChild1);
    List<Pair<String, Integer>> queryAndResult = new ArrayList<>();
    queryAndResult.add(Pair.of("select " + "count(*) from testdb1.table1 group " + "by rider having count(*) < 1", 0));
    DagNode child2OfChild1 = new HiveQueryNode(DeltaConfig.Config.newBuilder().withHiveQueryAndResults(queryAndResult).withHiveLocal(true).build());
    child1.addChildNode(child2OfChild1);
    List<DagNode> rootNodes = new ArrayList<>();
    rootNodes.add(root);
    return new WorkflowDag(rootNodes);
}
Also used : DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) ArrayList(java.util.ArrayList) HiveQueryNode(org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode) InsertNode(org.apache.hudi.integ.testsuite.dag.nodes.InsertNode) UpsertNode(org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode) Pair(org.apache.hudi.common.util.collection.Pair)

Example 2 with UpsertNode

use of org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode in project hudi by apache.

the class ComplexDagGenerator method build.

@Override
public WorkflowDag build() {
    // root node
    DagNode root = new InsertNode(Config.newBuilder().withNumRecordsToInsert(1000).withNumInsertPartitions(3).withRecordSize(1000).build());
    // child node1
    DagNode child1 = new UpsertNode(Config.newBuilder().withNumRecordsToUpdate(999).withNumRecordsToInsert(1000).withNumUpsertFiles(1).withNumUpsertPartitions(1).withNumInsertPartitions(1).withRecordSize(10000).build());
    // function used to build ValidateNode
    Function<List<DagNode<JavaRDD<WriteStatus>>>, Boolean> function = (dagNodes) -> {
        DagNode<JavaRDD<WriteStatus>> parent1 = dagNodes.get(0);
        List<WriteStatus> statuses = parent1.getResult().collect();
        long totalRecordsTouched = statuses.stream().map(st -> st.getStat().getNumUpdateWrites() + st.getStat().getNumInserts()).reduce((a, b) -> a + b).get();
        boolean b1 = totalRecordsTouched == parent1.getConfig().getNumRecordsInsert() + parent1.getConfig().getNumRecordsUpsert();
        boolean b2 = statuses.size() > parent1.getConfig().getNumUpsertFiles();
        DagNode<JavaRDD<WriteStatus>> parent2 = parent1.getParentNodes().get(0);
        statuses = parent2.getResult().collect();
        totalRecordsTouched = statuses.stream().map(st -> st.getStat().getNumUpdateWrites() + st.getStat().getNumInserts()).reduce((a, b) -> a + b).get();
        boolean b3 = totalRecordsTouched == parent2.getConfig().getNumRecordsInsert() * parent2.getConfig().getNumInsertPartitions() + parent2.getConfig().getNumRecordsUpsert();
        return b1 & b2 & b3;
    };
    // child node2
    DagNode child2 = new ValidateNode(Config.newBuilder().build(), function);
    // create relationship between nodes
    root.addChildNode(child1);
    // child1.addParentNode(root);
    child1.addChildNode(child2);
    // child2.addParentNode(child1);
    List<DagNode> rootNodes = new ArrayList<>();
    rootNodes.add(root);
    return new WorkflowDag(rootNodes);
}
Also used : UpsertNode(org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode) WriteStatus(org.apache.hudi.client.WriteStatus) DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) List(java.util.List) InsertNode(org.apache.hudi.integ.testsuite.dag.nodes.InsertNode) Config(org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config) Function(java.util.function.Function) JavaRDD(org.apache.spark.api.java.JavaRDD) ArrayList(java.util.ArrayList) ValidateNode(org.apache.hudi.integ.testsuite.dag.nodes.ValidateNode) ValidateNode(org.apache.hudi.integ.testsuite.dag.nodes.ValidateNode) ArrayList(java.util.ArrayList) UpsertNode(org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode) JavaRDD(org.apache.spark.api.java.JavaRDD) DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) List(java.util.List) ArrayList(java.util.ArrayList) InsertNode(org.apache.hudi.integ.testsuite.dag.nodes.InsertNode) WriteStatus(org.apache.hudi.client.WriteStatus)

Example 3 with UpsertNode

use of org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode in project hudi by apache.

the class TestWorkflowBuilder method testWorkloadOperationSequenceBuilder.

@Test
public void testWorkloadOperationSequenceBuilder() {
    DagNode root = new InsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToInsert(10000).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
    DagNode child1 = new UpsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToUpdate(10000).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
    root.addChildNode(child1);
    child1.addParentNode(root);
    List<DagNode> rootNodes = new ArrayList<>();
    rootNodes.add(root);
    WorkflowDag workflowDag = new WorkflowDag(rootNodes);
    assertEquals(workflowDag.getNodeList().size(), 1);
    assertEquals(((DagNode) workflowDag.getNodeList().get(0)).getChildNodes().size(), 1);
    DagNode dagNode = (DagNode) workflowDag.getNodeList().get(0);
    assertTrue(dagNode instanceof InsertNode);
    DeltaConfig.Config config = dagNode.getConfig();
    assertEquals(config.getNumInsertPartitions(), 1);
    assertEquals(config.getRecordSize(), 1000);
    assertEquals(config.getRepeatCount(), 2);
    assertEquals(config.getNumRecordsInsert(), 10000);
    assertEquals(config.getNumRecordsUpsert(), 0);
    dagNode = (DagNode) ((DagNode) workflowDag.getNodeList().get(0)).getChildNodes().get(0);
    assertTrue(dagNode instanceof UpsertNode);
    config = dagNode.getConfig();
    assertEquals(config.getNumInsertPartitions(), 1);
    assertEquals(config.getRecordSize(), 1000);
    assertEquals(config.getRepeatCount(), 2);
    assertEquals(config.getNumRecordsInsert(), 0);
    assertEquals(config.getNumRecordsUpsert(), 10000);
}
Also used : DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) ArrayList(java.util.ArrayList) WorkflowDag(org.apache.hudi.integ.testsuite.dag.WorkflowDag) InsertNode(org.apache.hudi.integ.testsuite.dag.nodes.InsertNode) UpsertNode(org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode) Test(org.junit.jupiter.api.Test)

Aggregations

ArrayList (java.util.ArrayList)3 DagNode (org.apache.hudi.integ.testsuite.dag.nodes.DagNode)3 InsertNode (org.apache.hudi.integ.testsuite.dag.nodes.InsertNode)3 UpsertNode (org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode)3 List (java.util.List)1 Function (java.util.function.Function)1 WriteStatus (org.apache.hudi.client.WriteStatus)1 Pair (org.apache.hudi.common.util.collection.Pair)1 Config (org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config)1 WorkflowDag (org.apache.hudi.integ.testsuite.dag.WorkflowDag)1 HiveQueryNode (org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode)1 ValidateNode (org.apache.hudi.integ.testsuite.dag.nodes.ValidateNode)1 JavaRDD (org.apache.spark.api.java.JavaRDD)1 Test (org.junit.jupiter.api.Test)1