use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.
the class MockedDagManager method testFlowSlaWithConfig.
@Test()
void testFlowSlaWithConfig() throws Exception {
long flowExecutionId = System.currentTimeMillis();
Dag<JobExecutionPlan> dag = DagManagerTest.buildDag("4", flowExecutionId, "FINISH_RUNNING", 1);
String dagId = DagManagerUtils.generateDagId(dag);
int queue = DagManagerUtils.getDagQueueId(dag, dagNumThreads);
when(this.dagManager.getJobStatusRetriever().getLatestExecutionIdsForFlow(eq("flow4"), eq("group4"), anyInt())).thenReturn(Collections.singletonList(flowExecutionId));
// change config to set a small sla
Config jobConfig = dag.getStartNodes().get(0).getValue().getJobSpec().getConfig();
jobConfig = jobConfig.withValue(ConfigurationKeys.GOBBLIN_FLOW_SLA_TIME, ConfigValueFactory.fromAnyRef("7")).withValue(ConfigurationKeys.GOBBLIN_FLOW_SLA_TIME_UNIT, ConfigValueFactory.fromAnyRef(TimeUnit.SECONDS.name()));
dag.getStartNodes().get(0).getValue().getJobSpec().setConfig(jobConfig);
// mock add spec
dagManager.addDag(dag, true, true);
// check existence of dag in dagToSLA map
AssertWithBackoff.create().maxSleepMs(5000).backoffFactor(1).assertTrue(input -> dagManager.dagManagerThreads[queue].dagToSLA.containsKey(dagId), ERROR_MESSAGE);
// check the SLA value
Assert.assertEquals(dagManager.dagManagerThreads[queue].dagToSLA.get(dagId).longValue(), TimeUnit.SECONDS.toMillis(7L));
// check existence of dag in dagToJobs map
AssertWithBackoff.create().maxSleepMs(5000).backoffFactor(1).assertTrue(input -> dagManager.dagManagerThreads[queue].dagToJobs.containsKey(dagId), ERROR_MESSAGE);
// verify cancelJob() of specProducer is called once
// which means job cancellation was triggered
AssertWithBackoff.create().maxSleepMs(5000).backoffFactor(1).assertTrue(new CancelPredicate(dag), ERROR_MESSAGE);
// check removal of dag from dagToSLA map
AssertWithBackoff.create().maxSleepMs(5000).backoffFactor(1).assertTrue(input -> !dagManager.dagManagerThreads[queue].dagToSLA.containsKey(dagId), ERROR_MESSAGE);
}
use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.
the class DagTestUtils method buildDag.
/**
* Create a {@link Dag < JobExecutionPlan >} with one parent and one child.
* @return a Dag.
*/
public static Dag<JobExecutionPlan> buildDag(String id, Long flowExecutionId) throws URISyntaxException {
List<JobExecutionPlan> jobExecutionPlans = new ArrayList<>();
for (int i = 0; i < 2; i++) {
String suffix = Integer.toString(i);
Config jobConfig = ConfigBuilder.create().addPrimitive(ConfigurationKeys.FLOW_GROUP_KEY, "group" + id).addPrimitive(ConfigurationKeys.FLOW_NAME_KEY, "flow" + id).addPrimitive(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, flowExecutionId).addPrimitive(ConfigurationKeys.JOB_NAME_KEY, "job" + suffix).build();
if (i > 0) {
jobConfig = jobConfig.withValue(ConfigurationKeys.JOB_DEPENDENCIES, ConfigValueFactory.fromAnyRef("job" + (i - 1)));
}
JobSpec js = JobSpec.builder("test_job" + suffix).withVersion(suffix).withConfig(jobConfig).withTemplate(new URI("job" + suffix)).build();
SpecExecutor specExecutor = buildNaiveTopologySpec("mySpecExecutor").getSpecExecutor();
JobExecutionPlan jobExecutionPlan = new JobExecutionPlan(js, specExecutor);
jobExecutionPlan.setExecutionStatus(ExecutionStatus.RUNNING);
// Future of type CompletedFuture is used because in tests InMemorySpecProducer is used and that responds with CompletedFuture
CompletedFuture future = new CompletedFuture<>(Boolean.TRUE, null);
jobExecutionPlan.setJobFuture(Optional.of(future));
jobExecutionPlans.add(jobExecutionPlan);
}
return new JobExecutionPlanDagFactory().createDag(jobExecutionPlans);
}
use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.
the class IdentityFlowToJobSpecCompilerTest method testCompilerWithoutTemplateCatalog.
@Test
public void testCompilerWithoutTemplateCatalog() {
FlowSpec flowSpec = initFlowSpec();
// Run compiler on flowSpec
Dag<JobExecutionPlan> jobExecutionPlanDag = this.compilerWithoutTemplateCalague.compileFlow(flowSpec);
// Assert pre-requisites
Assert.assertNotNull(jobExecutionPlanDag, "Expected non null dag.");
Assert.assertTrue(jobExecutionPlanDag.getNodes().size() == 1, "Exepected 1 executor for FlowSpec.");
// Assert FlowSpec compilation
Assert.assertEquals(jobExecutionPlanDag.getStartNodes().size(), 1);
Dag.DagNode<JobExecutionPlan> dagNode = jobExecutionPlanDag.getStartNodes().get(0);
Spec spec = dagNode.getValue().getJobSpec();
Assert.assertTrue(spec instanceof JobSpec, "Expected JobSpec compiled from FlowSpec.");
// Assert JobSpec properties
JobSpec jobSpec = (JobSpec) spec;
Assert.assertTrue(!jobSpec.getConfig().hasPath("testProperty1"));
Assert.assertTrue(!jobSpec.getConfig().hasPath("testProperty2"));
Assert.assertTrue(!jobSpec.getConfig().hasPath("testProperty3"));
Assert.assertEquals(jobSpec.getConfig().getString(ServiceConfigKeys.FLOW_SOURCE_IDENTIFIER_KEY), TEST_SOURCE_NAME);
Assert.assertFalse(jobSpec.getConfig().hasPath(ConfigurationKeys.JOB_SCHEDULE_KEY));
Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.JOB_NAME_KEY), TEST_FLOW_NAME);
Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.JOB_GROUP_KEY), TEST_FLOW_GROUP);
Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.FLOW_NAME_KEY), TEST_FLOW_NAME);
Assert.assertEquals(jobSpec.getConfig().getString(ConfigurationKeys.FLOW_GROUP_KEY), TEST_FLOW_GROUP);
Assert.assertTrue(jobSpec.getConfig().hasPath(ConfigurationKeys.FLOW_EXECUTION_ID_KEY));
// Assert the start node has no children.
Assert.assertEquals(jobExecutionPlanDag.getChildren(dagNode).size(), 0);
}
use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.
the class MultiHopFlowCompilerTest method testCompileFlow.
@Test
public void testCompileFlow() throws URISyntaxException, IOException {
FlowSpec spec = createFlowSpec("flow/flow1.conf", "LocalFS-1", "ADLS-1", false, false);
Dag<JobExecutionPlan> jobDag = this.specCompiler.compileFlow(spec);
Assert.assertEquals(jobDag.getNodes().size(), 4);
Assert.assertEquals(jobDag.getStartNodes().size(), 1);
Assert.assertEquals(jobDag.getEndNodes().size(), 1);
// Get the 1st hop - Distcp from "LocalFS-1" to "HDFS-1"
DagNode<JobExecutionPlan> startNode = jobDag.getStartNodes().get(0);
JobExecutionPlan jobSpecWithExecutor = startNode.getValue();
JobSpec jobSpec = jobSpecWithExecutor.getJobSpec();
// Ensure the resolved job config for the first hop has the correct substitutions.
Config jobConfig = jobSpec.getConfig();
String flowGroup = "testFlowGroup";
String flowName = "testFlowName";
String expectedJobName1 = Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join(flowGroup, flowName, "Distcp", "LocalFS-1", "HDFS-1", "localToHdfs");
String jobName1 = jobConfig.getString(ConfigurationKeys.JOB_NAME_KEY);
Assert.assertTrue(jobName1.startsWith(expectedJobName1));
String from = jobConfig.getString("from");
String to = jobConfig.getString("to");
Assert.assertEquals(from, "/data/out/testTeam/testDataset");
Assert.assertEquals(to, "/data/out/testTeam/testDataset");
String sourceFsUri = jobConfig.getString("fs.uri");
Assert.assertEquals(sourceFsUri, "file:///");
Assert.assertEquals(jobConfig.getString("source.filebased.fs.uri"), sourceFsUri);
Assert.assertEquals(jobConfig.getString("state.store.fs.uri"), sourceFsUri);
String targetFsUri = jobConfig.getString("target.filebased.fs.uri");
Assert.assertEquals(targetFsUri, "hdfs://hadoopnn01.grid.linkedin.com:8888/");
Assert.assertEquals(jobConfig.getString("writer.fs.uri"), targetFsUri);
Assert.assertEquals(new Path(jobConfig.getString("gobblin.dataset.pattern")), new Path(from));
Assert.assertEquals(jobConfig.getString("data.publisher.final.dir"), to);
Assert.assertEquals(jobConfig.getString("type"), "java");
Assert.assertEquals(jobConfig.getString("job.class"), "org.apache.gobblin.runtime.local.LocalJobLauncher");
Assert.assertEquals(jobConfig.getString("launcher.type"), "LOCAL");
// Ensure the spec executor has the correct configurations
SpecExecutor specExecutor = jobSpecWithExecutor.getSpecExecutor();
Assert.assertEquals(specExecutor.getUri().toString(), "fs:///");
Assert.assertEquals(specExecutor.getClass().getCanonicalName(), "org.apache.gobblin.runtime.spec_executorInstance.InMemorySpecExecutor");
// Get the 2nd hop - "HDFS-1 to HDFS-1 : convert avro to json and encrypt". Ensure config has correct substitutions.
Assert.assertEquals(jobDag.getChildren(startNode).size(), 1);
DagNode<JobExecutionPlan> secondHopNode = jobDag.getChildren(startNode).get(0);
jobSpecWithExecutor = secondHopNode.getValue();
jobConfig = jobSpecWithExecutor.getJobSpec().getConfig();
String expectedJobName2 = Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join(flowGroup, flowName, "ConvertToJsonAndEncrypt", "HDFS-1", "HDFS-1", "hdfsConvertToJsonAndEncrypt");
String jobName2 = jobConfig.getString(ConfigurationKeys.JOB_NAME_KEY);
Assert.assertTrue(jobName2.startsWith(expectedJobName2));
Assert.assertEquals(jobConfig.getString(ConfigurationKeys.JOB_DEPENDENCIES), jobName1);
from = jobConfig.getString("from");
to = jobConfig.getString("to");
Assert.assertEquals(from, "/data/out/testTeam/testDataset");
Assert.assertEquals(to, "/data/encrypted/testTeam/testDataset");
Assert.assertEquals(jobConfig.getString("source.filebased.data.directory"), from);
Assert.assertEquals(jobConfig.getString("data.publisher.final.dir"), to);
specExecutor = jobSpecWithExecutor.getSpecExecutor();
Assert.assertEquals(specExecutor.getUri().toString(), "https://azkaban01.gobblin.net:8443");
Assert.assertEquals(specExecutor.getClass().getCanonicalName(), "org.apache.gobblin.service.modules.flow.MultiHopFlowCompilerTest.TestAzkabanSpecExecutor");
// Get the 3rd hop - "Distcp HDFS-1 to HDFS-3"
Assert.assertEquals(jobDag.getChildren(secondHopNode).size(), 1);
DagNode<JobExecutionPlan> thirdHopNode = jobDag.getChildren(secondHopNode).get(0);
jobSpecWithExecutor = thirdHopNode.getValue();
jobConfig = jobSpecWithExecutor.getJobSpec().getConfig();
String expectedJobName3 = Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join(flowGroup, flowName, "Distcp", "HDFS-1", "HDFS-3", "hdfsToHdfs");
String jobName3 = jobConfig.getString(ConfigurationKeys.JOB_NAME_KEY);
Assert.assertTrue(jobName3.startsWith(expectedJobName3));
Assert.assertEquals(jobConfig.getString(ConfigurationKeys.JOB_DEPENDENCIES), jobName2);
from = jobConfig.getString("from");
to = jobConfig.getString("to");
Assert.assertEquals(from, "/data/encrypted/testTeam/testDataset");
Assert.assertEquals(to, "/data/encrypted/testTeam/testDataset");
Assert.assertEquals(jobConfig.getString("source.filebased.fs.uri"), "hdfs://hadoopnn01.grid.linkedin.com:8888/");
Assert.assertEquals(jobConfig.getString("target.filebased.fs.uri"), "hdfs://hadoopnn03.grid.linkedin.com:8888/");
Assert.assertEquals(jobConfig.getString("type"), "hadoopJava");
Assert.assertEquals(jobConfig.getString("job.class"), "org.apache.gobblin.azkaban.AzkabanJobLauncher");
Assert.assertEquals(jobConfig.getString("launcher.type"), "MAPREDUCE");
// Ensure the spec executor has the correct configurations
specExecutor = jobSpecWithExecutor.getSpecExecutor();
Assert.assertEquals(specExecutor.getUri().toString(), "https://azkaban01.gobblin.net:8443");
Assert.assertEquals(specExecutor.getClass().getCanonicalName(), "org.apache.gobblin.service.modules.flow.MultiHopFlowCompilerTest.TestAzkabanSpecExecutor");
// Get the 4th hop - "Distcp from HDFS-3 to ADLS-1"
Assert.assertEquals(jobDag.getChildren(thirdHopNode).size(), 1);
DagNode<JobExecutionPlan> fourthHopNode = jobDag.getChildren(thirdHopNode).get(0);
jobSpecWithExecutor = fourthHopNode.getValue();
jobConfig = jobSpecWithExecutor.getJobSpec().getConfig();
String expectedJobName4 = Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join(flowGroup, flowName, "DistcpToADL", "HDFS-3", "ADLS-1", "hdfsToAdl");
String jobName4 = jobConfig.getString(ConfigurationKeys.JOB_NAME_KEY);
Assert.assertTrue(jobName4.startsWith(expectedJobName4));
Assert.assertEquals(jobConfig.getString(ConfigurationKeys.JOB_DEPENDENCIES), jobName3);
from = jobConfig.getString("from");
to = jobConfig.getString("to");
Assert.assertEquals(from, "/data/encrypted/testTeam/testDataset");
Assert.assertEquals(to, "/data/encrypted/testTeam/testDataset");
Assert.assertEquals(jobConfig.getString("source.filebased.fs.uri"), "hdfs://hadoopnn03.grid.linkedin.com:8888/");
Assert.assertEquals(jobConfig.getString("target.filebased.fs.uri"), "adl://azuredatalakestore.net/");
Assert.assertEquals(jobConfig.getString("type"), "hadoopJava");
Assert.assertEquals(jobConfig.getString("job.class"), "org.apache.gobblin.azkaban.AzkabanJobLauncher");
Assert.assertEquals(jobConfig.getString("launcher.type"), "MAPREDUCE");
Assert.assertEquals(jobConfig.getString("dfs.adls.oauth2.client.id"), "1234");
Assert.assertEquals(jobConfig.getString("writer.encrypted.dfs.adls.oauth2.credential"), "credential");
Assert.assertEquals(jobConfig.getString("encrypt.key.loc"), "/user/testUser/master.password");
// Ensure the spec executor has the correct configurations
specExecutor = jobSpecWithExecutor.getSpecExecutor();
Assert.assertEquals(specExecutor.getUri().toString(), "https://azkaban03.gobblin.net:8443");
Assert.assertEquals(specExecutor.getClass().getCanonicalName(), "org.apache.gobblin.service.modules.flow.MultiHopFlowCompilerTest.TestAzkabanSpecExecutor");
// Ensure the fourth hop is the last
Assert.assertEquals(jobDag.getEndNodes().get(0), fourthHopNode);
}
use of org.apache.gobblin.service.modules.spec.JobExecutionPlan in project incubator-gobblin by apache.
the class MultiHopFlowCompilerTest method testCompileFlowSingleHop.
@Test(dependsOnMethods = "testCompileFlowAfterSecondEdgeDeletion")
public void testCompileFlowSingleHop() throws IOException, URISyntaxException {
FlowSpec spec = createFlowSpec("flow/flow2.conf", "HDFS-1", "HDFS-3", false, false);
Dag<JobExecutionPlan> jobDag = this.specCompiler.compileFlow(spec);
Assert.assertEquals(jobDag.getNodes().size(), 1);
Assert.assertEquals(jobDag.getStartNodes().size(), 1);
Assert.assertEquals(jobDag.getEndNodes().size(), 1);
Assert.assertEquals(jobDag.getStartNodes().get(0), jobDag.getEndNodes().get(0));
// Ensure hop is from HDFS-1 to HDFS-3 i.e. jobName == "testFlowGroup_testFlowName_Distcp_HDFS-1_HDFS-3".
DagNode<JobExecutionPlan> dagNode = jobDag.getStartNodes().get(0);
Config jobConfig = dagNode.getValue().getJobSpec().getConfig();
String expectedJobName = Joiner.on(JobExecutionPlan.Factory.JOB_NAME_COMPONENT_SEPARATION_CHAR).join("testFlowGroup", "testFlowName", "Distcp", "HDFS-1", "HDFS-3", "hdfsToHdfs");
String jobName = jobConfig.getString(ConfigurationKeys.JOB_NAME_KEY);
Assert.assertTrue(jobName.startsWith(expectedJobName));
}
Aggregations