use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.
the class TestDagUtils method testConvertDagToYamlHiveQuery.
@Test
public void testConvertDagToYamlHiveQuery() throws Exception {
WorkflowDag dag = new HiveSyncDagGenerator().build();
DagNode insert1 = (DagNode) dag.getNodeList().get(0);
DagNode hiveSync1 = (DagNode) insert1.getChildNodes().get(0);
DagNode hiveQuery1 = (DagNode) hiveSync1.getChildNodes().get(0);
String yaml = DagUtils.convertDagToYaml(dag);
WorkflowDag dag2 = DagUtils.convertYamlToDag(yaml);
DagNode insert2 = (DagNode) dag2.getNodeList().get(0);
DagNode hiveSync2 = (DagNode) insert2.getChildNodes().get(0);
DagNode hiveQuery2 = (DagNode) hiveSync2.getChildNodes().get(0);
assertEquals(hiveQuery1.getConfig().getHiveQueries().get(0), hiveQuery2.getConfig().getHiveQueries().get(0));
assertEquals(hiveQuery1.getConfig().getHiveProperties().get(0), hiveQuery2.getConfig().getHiveProperties().get(0));
}
use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.
the class TestWorkflowBuilder method testWorkloadOperationSequenceBuilder.
@Test
public void testWorkloadOperationSequenceBuilder() {
DagNode root = new InsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToInsert(10000).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
DagNode child1 = new UpsertNode(DeltaConfig.Config.newBuilder().withNumRecordsToUpdate(10000).withNumInsertPartitions(1).withNumTimesToRepeat(2).withRecordSize(1000).build());
root.addChildNode(child1);
child1.addParentNode(root);
List<DagNode> rootNodes = new ArrayList<>();
rootNodes.add(root);
WorkflowDag workflowDag = new WorkflowDag(rootNodes);
assertEquals(workflowDag.getNodeList().size(), 1);
assertEquals(((DagNode) workflowDag.getNodeList().get(0)).getChildNodes().size(), 1);
DagNode dagNode = (DagNode) workflowDag.getNodeList().get(0);
assertTrue(dagNode instanceof InsertNode);
DeltaConfig.Config config = dagNode.getConfig();
assertEquals(config.getNumInsertPartitions(), 1);
assertEquals(config.getRecordSize(), 1000);
assertEquals(config.getRepeatCount(), 2);
assertEquals(config.getNumRecordsInsert(), 10000);
assertEquals(config.getNumRecordsUpsert(), 0);
dagNode = (DagNode) ((DagNode) workflowDag.getNodeList().get(0)).getChildNodes().get(0);
assertTrue(dagNode instanceof UpsertNode);
config = dagNode.getConfig();
assertEquals(config.getNumInsertPartitions(), 1);
assertEquals(config.getRecordSize(), 1000);
assertEquals(config.getRepeatCount(), 2);
assertEquals(config.getNumRecordsInsert(), 0);
assertEquals(config.getNumRecordsUpsert(), 10000);
}
use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.
the class HiveSyncDagGenerator method build.
@Override
public WorkflowDag build() {
DagNode root = new InsertNode(Config.newBuilder().withNumRecordsToInsert(100).withNumInsertPartitions(1).withNumTimesToRepeat(1).withRecordSize(1000).build());
DagNode child1 = new HiveSyncNode(Config.newBuilder().build());
root.addChildNode(child1);
DagNode child2 = new HiveQueryNode(Config.newBuilder().withHiveQueryAndResults(Arrays.asList(Pair.of("select " + "count(*) from testdb1.table1 group " + "by rider having count(*) < 1", 0))).build());
child1.addChildNode(child2);
List<DagNode> rootNodes = new ArrayList<>();
rootNodes.add(root);
return new WorkflowDag(rootNodes);
}
use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.
the class HiveSyncDagGeneratorMOR method build.
@Override
public WorkflowDag build() {
DagNode root = new InsertNode(Config.newBuilder().withNumRecordsToInsert(100).withNumInsertPartitions(1).withNumTimesToRepeat(1).withRecordSize(1000).build());
DagNode child1 = new HiveSyncNode(Config.newBuilder().withHiveLocal(true).build());
root.addChildNode(child1);
DagNode child2 = new HiveQueryNode(Config.newBuilder().withHiveLocal(true).withHiveQueryAndResults(Arrays.asList(Pair.of("select " + "count(*) from testdb1.hive_trips group " + "by rider having count(*) < 1", 0), Pair.of("select " + "count(*) from testdb1.hive_trips ", 100))).withHiveProperties(Arrays.asList("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat")).build());
child1.addChildNode(child2);
List<DagNode> rootNodes = new ArrayList<>();
rootNodes.add(root);
return new WorkflowDag(rootNodes);
}
use of org.apache.hudi.integ.testsuite.dag.nodes.DagNode in project hudi by apache.
the class HoodieTestSuiteJob method runTestSuite.
public void runTestSuite() {
try {
WorkflowDag workflowDag = createWorkflowDag();
log.info("Workflow Dag => " + DagUtils.convertDagToYaml(workflowDag));
long startTime = System.currentTimeMillis();
WriterContext writerContext = new WriterContext(jsc, props, cfg, keyGenerator, sparkSession);
writerContext.initContext(jsc);
startOtherServicesIfNeeded(writerContext);
if (this.cfg.saferSchemaEvolution) {
// rollback most recent upsert/insert, by default.
int numRollbacks = 2;
// if root is RollbackNode, get num_rollbacks
List<DagNode> root = workflowDag.getNodeList();
if (!root.isEmpty() && root.get(0) instanceof RollbackNode) {
numRollbacks = root.get(0).getConfig().getNumRollbacks();
}
int version = getSchemaVersionFromCommit(numRollbacks - 1);
SaferSchemaDagScheduler dagScheduler = new SaferSchemaDagScheduler(workflowDag, writerContext, jsc, version);
dagScheduler.schedule();
} else {
DagScheduler dagScheduler = new DagScheduler(workflowDag, writerContext, jsc);
dagScheduler.schedule();
}
log.info("Finished scheduling all tasks, Time taken {}", System.currentTimeMillis() - startTime);
} catch (Exception e) {
log.error("Failed to run Test Suite ", e);
throw new HoodieException("Failed to run Test Suite ", e);
} finally {
stopQuietly();
}
}
Aggregations