Search in sources :

Example 6 with DatasetState

use of org.apache.gobblin.runtime.JobState.DatasetState in project incubator-gobblin by apache.

the class JobLauncherTestHelper method runTestWithFork.

public void runTestWithFork(Properties jobProps) throws Exception {
    String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY);
    String jobId = JobLauncherUtils.newJobId(jobName).toString();
    jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId);
    try (JobLauncher jobLauncher = JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps)) {
        jobLauncher.launchJob(null);
    }
    List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, sanitizeJobNameForDatasetStore(jobId) + ".jst");
    DatasetState datasetState = datasetStateList.get(0);
    Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
    Assert.assertEquals(datasetState.getCompletedTasks(), 4);
    Assert.assertEquals(datasetState.getJobFailures(), 0);
    FileSystem lfs = FileSystem.getLocal(new Configuration());
    for (TaskState taskState : datasetState.getTaskStates()) {
        Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
        Path path = new Path(this.launcherProps.getProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR), new Path(taskState.getExtract().getOutputFilePath(), "fork_0"));
        Assert.assertTrue(lfs.exists(path));
        Assert.assertEquals(lfs.listStatus(path).length, 2);
        Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN + ".0"), TestExtractor.TOTAL_RECORDS);
        path = new Path(this.launcherProps.getProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR), new Path(taskState.getExtract().getOutputFilePath(), "fork_1"));
        Assert.assertTrue(lfs.exists(path));
        Assert.assertEquals(lfs.listStatus(path).length, 2);
        Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN + ".1"), TestExtractor.TOTAL_RECORDS);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) DatasetState(org.apache.gobblin.runtime.JobState.DatasetState)

Example 7 with DatasetState

use of org.apache.gobblin.runtime.JobState.DatasetState in project incubator-gobblin by apache.

the class JobLauncherTestHelper method runTestWithMultipleDatasetsAndFaultyExtractor.

public void runTestWithMultipleDatasetsAndFaultyExtractor(Properties jobProps, boolean usePartialCommitPolicy) throws Exception {
    String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY);
    String jobId = JobLauncherUtils.newJobId(jobName).toString();
    jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId);
    jobProps.setProperty(ConfigurationKeys.SOURCE_CLASS_KEY, MultiDatasetTestSourceWithFaultyExtractor.class.getName());
    jobProps.setProperty(ConfigurationKeys.MAX_TASK_RETRIES_KEY, "0");
    if (usePartialCommitPolicy) {
        jobProps.setProperty(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, "partial");
    }
    Closer closer = Closer.create();
    try {
        JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps));
        jobLauncher.launchJob(null);
    } catch (JobException je) {
    // JobException is expected
    } finally {
        closer.close();
    }
    if (usePartialCommitPolicy) {
        List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, "Dataset0-current.jst");
        JobState.DatasetState datasetState = datasetStateList.get(0);
        Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
        Assert.assertEquals(datasetState.getTaskCount(), 1);
        TaskState taskState = datasetState.getTaskStates().get(0);
        // BaseDataPublisher will change the state to COMMITTED
        Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
    } else {
        // Task 0 should have failed
        Assert.assertTrue(this.datasetStateStore.getAll(jobName, "Dataset0-current.jst").isEmpty());
    }
    for (int i = 1; i < 4; i++) {
        List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, "Dataset" + i + "-current.jst");
        JobState.DatasetState datasetState = datasetStateList.get(0);
        Assert.assertEquals(datasetState.getDatasetUrn(), "Dataset" + i);
        Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
        Assert.assertEquals(datasetState.getCompletedTasks(), 1);
        for (TaskState taskState : datasetState.getTaskStates()) {
            Assert.assertEquals(taskState.getProp(ConfigurationKeys.DATASET_URN_KEY), "Dataset" + i);
            Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
        }
    }
}
Also used : Closer(com.google.common.io.Closer) DatasetState(org.apache.gobblin.runtime.JobState.DatasetState) DatasetState(org.apache.gobblin.runtime.JobState.DatasetState)

Example 8 with DatasetState

use of org.apache.gobblin.runtime.JobState.DatasetState in project incubator-gobblin by apache.

the class JobContext method commit.

/**
 * Commit the job based on whether the job is cancelled.
 */
void commit(final boolean isJobCancelled) throws IOException {
    this.datasetStatesByUrns = Optional.of(computeDatasetStatesByUrns());
    final boolean shouldCommitDataInJob = shouldCommitDataInJob(this.jobState);
    final DeliverySemantics deliverySemantics = DeliverySemantics.parse(this.jobState);
    final int numCommitThreads = numCommitThreads();
    if (!shouldCommitDataInJob) {
        this.logger.info("Job will not commit data since data are committed by tasks.");
    }
    try {
        if (this.datasetStatesByUrns.isPresent()) {
            this.logger.info("Persisting dataset urns.");
            this.datasetStateStore.persistDatasetURNs(this.jobName, this.datasetStatesByUrns.get().keySet());
        }
        List<Either<Void, ExecutionException>> result = new IteratorExecutor<>(Iterables.transform(this.datasetStatesByUrns.get().entrySet(), new Function<Map.Entry<String, DatasetState>, Callable<Void>>() {

            @Nullable
            @Override
            public Callable<Void> apply(final Map.Entry<String, DatasetState> entry) {
                return createSafeDatasetCommit(shouldCommitDataInJob, isJobCancelled, deliverySemantics, entry.getKey(), entry.getValue(), numCommitThreads > 1, JobContext.this);
            }
        }).iterator(), numCommitThreads, ExecutorsUtils.newThreadFactory(Optional.of(this.logger), Optional.of("Commit-thread-%d"))).executeAndGetResults();
        IteratorExecutor.logFailures(result, LOG, 10);
        if (!IteratorExecutor.verifyAllSuccessful(result)) {
            this.jobState.setState(JobState.RunningState.FAILED);
            throw new IOException("Failed to commit dataset state for some dataset(s) of job " + this.jobId);
        }
    } catch (InterruptedException exc) {
        throw new IOException(exc);
    }
    this.jobState.setState(JobState.RunningState.COMMITTED);
}
Also used : IOException(java.io.IOException) DeliverySemantics(org.apache.gobblin.commit.DeliverySemantics) Function(com.google.common.base.Function) Either(org.apache.gobblin.util.Either) DatasetState(org.apache.gobblin.runtime.JobState.DatasetState) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Aggregations

DatasetState (org.apache.gobblin.runtime.JobState.DatasetState)8 Closer (com.google.common.io.Closer)4 Configuration (org.apache.hadoop.conf.Configuration)3 Path (org.apache.hadoop.fs.Path)3 State (org.apache.gobblin.configuration.State)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 BeforeClass (org.testng.annotations.BeforeClass)2 Function (com.google.common.base.Function)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 IOException (java.io.IOException)1 Map (java.util.Map)1 CommitSequence (org.apache.gobblin.commit.CommitSequence)1 DeliverySemantics (org.apache.gobblin.commit.DeliverySemantics)1 FsRenameCommitStep (org.apache.gobblin.commit.FsRenameCommitStep)1 DatasetStateCommitStep (org.apache.gobblin.runtime.commit.DatasetStateCommitStep)1 FsCommitSequenceStore (org.apache.gobblin.runtime.commit.FsCommitSequenceStore)1 Either (org.apache.gobblin.util.Either)1