use of org.apache.gobblin.runtime.JobState.DatasetState in project incubator-gobblin by apache.
the class JobLauncherTestHelper method runTestWithFork.
public void runTestWithFork(Properties jobProps) throws Exception {
String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY);
String jobId = JobLauncherUtils.newJobId(jobName).toString();
jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId);
try (JobLauncher jobLauncher = JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps)) {
jobLauncher.launchJob(null);
}
List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, sanitizeJobNameForDatasetStore(jobId) + ".jst");
DatasetState datasetState = datasetStateList.get(0);
Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
Assert.assertEquals(datasetState.getCompletedTasks(), 4);
Assert.assertEquals(datasetState.getJobFailures(), 0);
FileSystem lfs = FileSystem.getLocal(new Configuration());
for (TaskState taskState : datasetState.getTaskStates()) {
Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
Path path = new Path(this.launcherProps.getProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR), new Path(taskState.getExtract().getOutputFilePath(), "fork_0"));
Assert.assertTrue(lfs.exists(path));
Assert.assertEquals(lfs.listStatus(path).length, 2);
Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN + ".0"), TestExtractor.TOTAL_RECORDS);
path = new Path(this.launcherProps.getProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR), new Path(taskState.getExtract().getOutputFilePath(), "fork_1"));
Assert.assertTrue(lfs.exists(path));
Assert.assertEquals(lfs.listStatus(path).length, 2);
Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN + ".1"), TestExtractor.TOTAL_RECORDS);
}
}
use of org.apache.gobblin.runtime.JobState.DatasetState in project incubator-gobblin by apache.
the class JobLauncherTestHelper method runTestWithMultipleDatasetsAndFaultyExtractor.
public void runTestWithMultipleDatasetsAndFaultyExtractor(Properties jobProps, boolean usePartialCommitPolicy) throws Exception {
String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY);
String jobId = JobLauncherUtils.newJobId(jobName).toString();
jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId);
jobProps.setProperty(ConfigurationKeys.SOURCE_CLASS_KEY, MultiDatasetTestSourceWithFaultyExtractor.class.getName());
jobProps.setProperty(ConfigurationKeys.MAX_TASK_RETRIES_KEY, "0");
if (usePartialCommitPolicy) {
jobProps.setProperty(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, "partial");
}
Closer closer = Closer.create();
try {
JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps));
jobLauncher.launchJob(null);
} catch (JobException je) {
// JobException is expected
} finally {
closer.close();
}
if (usePartialCommitPolicy) {
List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, "Dataset0-current.jst");
JobState.DatasetState datasetState = datasetStateList.get(0);
Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
Assert.assertEquals(datasetState.getTaskCount(), 1);
TaskState taskState = datasetState.getTaskStates().get(0);
// BaseDataPublisher will change the state to COMMITTED
Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
} else {
// Task 0 should have failed
Assert.assertTrue(this.datasetStateStore.getAll(jobName, "Dataset0-current.jst").isEmpty());
}
for (int i = 1; i < 4; i++) {
List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, "Dataset" + i + "-current.jst");
JobState.DatasetState datasetState = datasetStateList.get(0);
Assert.assertEquals(datasetState.getDatasetUrn(), "Dataset" + i);
Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
Assert.assertEquals(datasetState.getCompletedTasks(), 1);
for (TaskState taskState : datasetState.getTaskStates()) {
Assert.assertEquals(taskState.getProp(ConfigurationKeys.DATASET_URN_KEY), "Dataset" + i);
Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
}
}
}
use of org.apache.gobblin.runtime.JobState.DatasetState in project incubator-gobblin by apache.
the class JobContext method commit.
/**
* Commit the job based on whether the job is cancelled.
*/
void commit(final boolean isJobCancelled) throws IOException {
this.datasetStatesByUrns = Optional.of(computeDatasetStatesByUrns());
final boolean shouldCommitDataInJob = shouldCommitDataInJob(this.jobState);
final DeliverySemantics deliverySemantics = DeliverySemantics.parse(this.jobState);
final int numCommitThreads = numCommitThreads();
if (!shouldCommitDataInJob) {
this.logger.info("Job will not commit data since data are committed by tasks.");
}
try {
if (this.datasetStatesByUrns.isPresent()) {
this.logger.info("Persisting dataset urns.");
this.datasetStateStore.persistDatasetURNs(this.jobName, this.datasetStatesByUrns.get().keySet());
}
List<Either<Void, ExecutionException>> result = new IteratorExecutor<>(Iterables.transform(this.datasetStatesByUrns.get().entrySet(), new Function<Map.Entry<String, DatasetState>, Callable<Void>>() {
@Nullable
@Override
public Callable<Void> apply(final Map.Entry<String, DatasetState> entry) {
return createSafeDatasetCommit(shouldCommitDataInJob, isJobCancelled, deliverySemantics, entry.getKey(), entry.getValue(), numCommitThreads > 1, JobContext.this);
}
}).iterator(), numCommitThreads, ExecutorsUtils.newThreadFactory(Optional.of(this.logger), Optional.of("Commit-thread-%d"))).executeAndGetResults();
IteratorExecutor.logFailures(result, LOG, 10);
if (!IteratorExecutor.verifyAllSuccessful(result)) {
this.jobState.setState(JobState.RunningState.FAILED);
throw new IOException("Failed to commit dataset state for some dataset(s) of job " + this.jobId);
}
} catch (InterruptedException exc) {
throw new IOException(exc);
}
this.jobState.setState(JobState.RunningState.COMMITTED);
}
Aggregations