Search in sources :

Example 6 with DagNode

use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.

the class TestDagUtils method testConvertDagToYamlHiveQuery.

@Test
public void testConvertDagToYamlHiveQuery() throws Exception {
    WorkflowDag dag = new HiveSyncDagGenerator().build();
    DagNode insert1 = (DagNode) dag.getNodeList().get(0);
    DagNode hiveSync1 = (DagNode) insert1.getChildNodes().get(0);
    DagNode hiveQuery1 = (DagNode) hiveSync1.getChildNodes().get(0);
    String yaml = DagUtils.convertDagToYaml(dag);
    WorkflowDag dag2 = DagUtils.convertYamlToDag(yaml);
    DagNode insert2 = (DagNode) dag2.getNodeList().get(0);
    DagNode hiveSync2 = (DagNode) insert2.getChildNodes().get(0);
    DagNode hiveQuery2 = (DagNode) hiveSync2.getChildNodes().get(0);
    assertEquals(hiveQuery1.getConfig().getHiveQueries().get(0), hiveQuery2.getConfig().getHiveQueries().get(0));
    assertEquals(hiveQuery1.getConfig().getHiveProperties().get(0), hiveQuery2.getConfig().getHiveProperties().get(0));
}
Also used : DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) Test(org.junit.jupiter.api.Test)

Example 7 with DagNode

use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.

the class TestWorkflowBuilder method testWorkloadOperationSequenceBuilder.

@Test
public void testWorkloadOperationSequenceBuilder() {
    DagNode root = new InsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToInsert(10000).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
    DagNode child1 = new UpsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToUpdate(10000).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
    root.addChildNode(child1);
    child1.addParentNode(root);
    List<DagNode> rootNodes = new ArrayList<>();
    rootNodes.add(root);
    WorkflowDag workflowDag = new WorkflowDag(rootNodes);
    assertEquals(workflowDag.getNodeList().size(), 1);
    assertEquals(((DagNode) workflowDag.getNodeList().get(0)).getChildNodes().size(), 1);
    DagNode dagNode = (DagNode) workflowDag.getNodeList().get(0);
    assertTrue(dagNode instanceof InsertNode);
    DeltaConfig.Config config = dagNode.getConfig();
    assertEquals(config.getNumInsertPartitions(), 1);
    assertEquals(config.getRecordSize(), 1000);
    assertEquals(config.getRepeatCount(), 2);
    assertEquals(config.getNumRecordsInsert(), 10000);
    assertEquals(config.getNumRecordsUpsert(), 0);
    dagNode = (DagNode) ((DagNode) workflowDag.getNodeList().get(0)).getChildNodes().get(0);
    assertTrue(dagNode instanceof UpsertNode);
    config = dagNode.getConfig();
    assertEquals(config.getNumInsertPartitions(), 1);
    assertEquals(config.getRecordSize(), 1000);
    assertEquals(config.getRepeatCount(), 2);
    assertEquals(config.getNumRecordsInsert(), 0);
    assertEquals(config.getNumRecordsUpsert(), 10000);
}
Also used : DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) ArrayList(java.util.ArrayList) WorkflowDag(org.apache.hudi.integ.testsuite.dag.WorkflowDag) InsertNode(org.apache.hudi.integ.testsuite.dag.nodes.InsertNode) UpsertNode(org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode) Test(org.junit.jupiter.api.Test)

Example 8 with DagNode

use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.

the class HiveSyncDagGenerator method build.

@Override
public WorkflowDag build() {
    DagNode root = new InsertNode(Config.newBuilder().withNumRecordsToInsert(100).withNumInsertPartitions(1).withNumTimesToRepeat(1).withRecordSize(1000).build());
    DagNode child1 = new HiveSyncNode(Config.newBuilder().build());
    root.addChildNode(child1);
    DagNode child2 = new HiveQueryNode(Config.newBuilder().withHiveQueryAndResults(Arrays.asList(Pair.of("select " + "count(*) from testdb1.table1 group " + "by rider having count(*) < 1", 0))).build());
    child1.addChildNode(child2);
    List<DagNode> rootNodes = new ArrayList<>();
    rootNodes.add(root);
    return new WorkflowDag(rootNodes);
}
Also used : DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) HiveSyncNode(org.apache.hudi.integ.testsuite.dag.nodes.HiveSyncNode) ArrayList(java.util.ArrayList) HiveQueryNode(org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode) InsertNode(org.apache.hudi.integ.testsuite.dag.nodes.InsertNode)

Example 9 with DagNode

use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.

the class HiveSyncDagGeneratorMOR method build.

@Override
public WorkflowDag build() {
    DagNode root = new InsertNode(Config.newBuilder().withNumRecordsToInsert(100).withNumInsertPartitions(1).withNumTimesToRepeat(1).withRecordSize(1000).build());
    DagNode child1 = new HiveSyncNode(Config.newBuilder().withHiveLocal(true).build());
    root.addChildNode(child1);
    DagNode child2 = new HiveQueryNode(Config.newBuilder().withHiveLocal(true).withHiveQueryAndResults(Arrays.asList(Pair.of("select " + "count(*) from testdb1.hive_trips group " + "by rider having count(*) < 1", 0), Pair.of("select " + "count(*) from testdb1.hive_trips ", 100))).withHiveProperties(Arrays.asList("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat")).build());
    child1.addChildNode(child2);
    List<DagNode> rootNodes = new ArrayList<>();
    rootNodes.add(root);
    return new WorkflowDag(rootNodes);
}
Also used : DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) HiveSyncNode(org.apache.hudi.integ.testsuite.dag.nodes.HiveSyncNode) ArrayList(java.util.ArrayList) HiveQueryNode(org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode) InsertNode(org.apache.hudi.integ.testsuite.dag.nodes.InsertNode)

Example 10 with DagNode

use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.

the class HoodieTestSuiteJob method runTestSuite.

public void runTestSuite() {
    try {
        WorkflowDag workflowDag = createWorkflowDag();
        log.info("Workflow Dag => " + DagUtils.convertDagToYaml(workflowDag));
        long startTime = System.currentTimeMillis();
        WriterContext writerContext = new WriterContext(jsc, props, cfg, keyGenerator, sparkSession);
        writerContext.initContext(jsc);
        startOtherServicesIfNeeded(writerContext);
        if (this.cfg.saferSchemaEvolution) {
            // rollback most recent upsert/insert, by default.
            int numRollbacks = 2;
            // if root is RollbackNode, get num_rollbacks
            List<DagNode> root = workflowDag.getNodeList();
            if (!root.isEmpty() && root.get(0) instanceof RollbackNode) {
                numRollbacks = root.get(0).getConfig().getNumRollbacks();
            }
            int version = getSchemaVersionFromCommit(numRollbacks - 1);
            SaferSchemaDagScheduler dagScheduler = new SaferSchemaDagScheduler(workflowDag, writerContext, jsc, version);
            dagScheduler.schedule();
        } else {
            DagScheduler dagScheduler = new DagScheduler(workflowDag, writerContext, jsc);
            dagScheduler.schedule();
        }
        log.info("Finished scheduling all tasks, Time taken {}", System.currentTimeMillis() - startTime);
    } catch (Exception e) {
        log.error("Failed to run Test Suite ", e);
        throw new HoodieException("Failed to run Test Suite ", e);
    } finally {
        stopQuietly();
    }
}
Also used : WriterContext(org.apache.hudi.integ.testsuite.dag.WriterContext) DagNode(org.apache.hudi.integ.testsuite.dag.nodes.DagNode) SaferSchemaDagScheduler(org.apache.hudi.integ.testsuite.dag.scheduler.SaferSchemaDagScheduler) DagScheduler(org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler) RollbackNode(org.apache.hudi.integ.testsuite.dag.nodes.RollbackNode) SaferSchemaDagScheduler(org.apache.hudi.integ.testsuite.dag.scheduler.SaferSchemaDagScheduler) WorkflowDag(org.apache.hudi.integ.testsuite.dag.WorkflowDag) HoodieException(org.apache.hudi.exception.HoodieException) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException)

Aggregations

DagNode (org.apache.hudi.integ.testsuite.dag.nodes.DagNode)12 ArrayList (java.util.ArrayList)6 InsertNode (org.apache.hudi.integ.testsuite.dag.nodes.InsertNode)5 HiveQueryNode (org.apache.hudi.integ.testsuite.dag.nodes.HiveQueryNode)3 UpsertNode (org.apache.hudi.integ.testsuite.dag.nodes.UpsertNode)3 Test (org.junit.jupiter.api.Test)3 WorkflowDag (org.apache.hudi.integ.testsuite.dag.WorkflowDag)2 HiveSyncNode (org.apache.hudi.integ.testsuite.dag.nodes.HiveSyncNode)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)1 YAMLFactory (com.fasterxml.jackson.dataformat.yaml.YAMLFactory)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Entry (java.util.Map.Entry)1 PriorityQueue (java.util.PriorityQueue)1 Future (java.util.concurrent.Future)1 Function (java.util.function.Function)1