Search in sources :

Example 1 with WorkUnitStream

use of org.apache.gobblin.source.workunit.WorkUnitStream in project incubator-gobblin by apache.

the class AbstractJobLauncher method launchJob.

@Override
public void launchJob(JobListener jobListener) throws JobException {
    String jobId = this.jobContext.getJobId();
    final JobState jobState = this.jobContext.getJobState();
    try {
        MDC.put(ConfigurationKeys.JOB_NAME_KEY, this.jobContext.getJobName());
        MDC.put(ConfigurationKeys.JOB_KEY_KEY, this.jobContext.getJobKey());
        TimingEvent launchJobTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.FULL_JOB_EXECUTION);
        try (Closer closer = Closer.create()) {
            closer.register(this.jobContext);
            notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_PREPARE, new JobListenerAction() {

                @Override
                public void apply(JobListener jobListener, JobContext jobContext) throws Exception {
                    jobListener.onJobPrepare(jobContext);
                }
            });
            if (this.jobContext.getSemantics() == DeliverySemantics.EXACTLY_ONCE) {
                // If exactly-once is used, commit sequences of the previous run must be successfully compelted
                // before this run can make progress.
                executeUnfinishedCommitSequences(jobState.getJobName());
            }
            TimingEvent workUnitsCreationTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.WORK_UNITS_CREATION);
            Source<?, ?> source = this.jobContext.getSource();
            WorkUnitStream workUnitStream;
            if (source instanceof WorkUnitStreamSource) {
                workUnitStream = ((WorkUnitStreamSource) source).getWorkunitStream(jobState);
            } else {
                workUnitStream = new BasicWorkUnitStream.Builder(source.getWorkunits(jobState)).build();
            }
            workUnitsCreationTimer.stop(this.eventMetadataGenerator.getMetadata(this.jobContext, EventName.WORK_UNITS_CREATION));
            // The absence means there is something wrong getting the work units
            if (workUnitStream == null || workUnitStream.getWorkUnits() == null) {
                this.eventSubmitter.submit(JobEvent.WORK_UNITS_MISSING);
                jobState.setState(JobState.RunningState.FAILED);
                throw new JobException("Failed to get work units for job " + jobId);
            }
            // No work unit to run
            if (!workUnitStream.getWorkUnits().hasNext()) {
                this.eventSubmitter.submit(JobEvent.WORK_UNITS_EMPTY);
                LOG.warn("No work units have been created for job " + jobId);
                jobState.setState(JobState.RunningState.COMMITTED);
                notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_COMPLETE, new JobListenerAction() {

                    @Override
                    public void apply(JobListener jobListener, JobContext jobContext) throws Exception {
                        jobListener.onJobCompletion(jobContext);
                    }
                });
                return;
            }
            // Initialize writer and converter(s)
            closer.register(WriterInitializerFactory.newInstace(jobState, workUnitStream)).initialize();
            closer.register(ConverterInitializerFactory.newInstance(jobState, workUnitStream)).initialize();
            TimingEvent stagingDataCleanTimer = this.eventSubmitter.getTimingEvent(TimingEvent.RunJobTimings.MR_STAGING_DATA_CLEAN);
            // Cleanup left-over staging data possibly from the previous run. This is particularly
            // important if the current batch of WorkUnits include failed WorkUnits from the previous
            // run which may still have left-over staging data not cleaned up yet.
            cleanLeftoverStagingData(workUnitStream, jobState);
            stagingDataCleanTimer.stop(this.eventMetadataGenerator.getMetadata(this.jobContext, EventName.MR_STAGING_DATA_CLEAN));
            long startTime = System.currentTimeMillis();
            jobState.setStartTime(startTime);
            jobState.setState(JobState.RunningState.RUNNING);
            try {
                LOG.info("Starting job " + jobId);
                notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_START, new JobListenerAction() {

                    @Override
                    public void apply(JobListener jobListener, JobContext jobContext) throws Exception {
                        jobListener.onJobStart(jobContext);
                    }
                });
                TimingEvent workUnitsPreparationTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.WORK_UNITS_PREPARATION);
                // Add task ids
                workUnitStream = prepareWorkUnits(workUnitStream, jobState);
                // Remove skipped workUnits from the list of work units to execute.
                workUnitStream = workUnitStream.filter(new SkippedWorkUnitsFilter(jobState));
                // Add surviving tasks to jobState
                workUnitStream = workUnitStream.transform(new MultiWorkUnitForEach() {

                    @Override
                    public void forWorkUnit(WorkUnit workUnit) {
                        jobState.incrementTaskCount();
                        jobState.addTaskState(new TaskState(new WorkUnitState(workUnit, jobState)));
                    }
                });
                workUnitsPreparationTimer.stop(this.eventMetadataGenerator.getMetadata(this.jobContext, EventName.WORK_UNITS_PREPARATION));
                // Write job execution info to the job history store before the job starts to run
                this.jobContext.storeJobExecutionInfo();
                TimingEvent jobRunTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.JOB_RUN);
                // Start the job and wait for it to finish
                runWorkUnitStream(workUnitStream);
                jobRunTimer.stop(this.eventMetadataGenerator.getMetadata(this.jobContext, EventName.JOB_RUN));
                this.eventSubmitter.submit(CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, "JOB_" + jobState.getState()));
                // Check and set final job jobPropsState upon job completion
                if (jobState.getState() == JobState.RunningState.CANCELLED) {
                    LOG.info(String.format("Job %s has been cancelled, aborting now", jobId));
                    return;
                }
                TimingEvent jobCommitTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.JOB_COMMIT);
                this.jobContext.finalizeJobStateBeforeCommit();
                this.jobContext.commit();
                postProcessJobState(jobState);
                jobCommitTimer.stop(this.eventMetadataGenerator.getMetadata(this.jobContext, EventName.JOB_COMMIT));
            } finally {
                long endTime = System.currentTimeMillis();
                jobState.setEndTime(endTime);
                jobState.setDuration(endTime - jobState.getStartTime());
            }
        } catch (Throwable t) {
            jobState.setState(JobState.RunningState.FAILED);
            String errMsg = "Failed to launch and run job " + jobId;
            LOG.error(errMsg + ": " + t, t);
        } finally {
            try {
                TimingEvent jobCleanupTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.JOB_CLEANUP);
                cleanupStagingData(jobState);
                jobCleanupTimer.stop(this.eventMetadataGenerator.getMetadata(this.jobContext, EventName.JOB_CLEANUP));
                // Write job execution info to the job history store upon job termination
                this.jobContext.storeJobExecutionInfo();
            } finally {
                launchJobTimer.stop(this.eventMetadataGenerator.getMetadata(this.jobContext, EventName.FULL_JOB_EXECUTION));
            }
        }
        for (JobState.DatasetState datasetState : this.jobContext.getDatasetStatesByUrns().values()) {
            // Set the overall job state to FAILED if the job failed to process any dataset
            if (datasetState.getState() == JobState.RunningState.FAILED) {
                jobState.setState(JobState.RunningState.FAILED);
                LOG.warn("At least one dataset state is FAILED. Setting job state to FAILED.");
                break;
            }
        }
        notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_COMPLETE, new JobListenerAction() {

            @Override
            public void apply(JobListener jobListener, JobContext jobContext) throws Exception {
                jobListener.onJobCompletion(jobContext);
            }
        });
        if (jobState.getState() == JobState.RunningState.FAILED) {
            notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_FAILED, new JobListenerAction() {

                @Override
                public void apply(JobListener jobListener, JobContext jobContext) throws Exception {
                    jobListener.onJobFailure(jobContext);
                }
            });
            throw new JobException(String.format("Job %s failed", jobId));
        }
    } finally {
        // Stop metrics reporting
        if (this.jobContext.getJobMetricsOptional().isPresent()) {
            JobMetrics.remove(jobState);
        }
        MDC.remove(ConfigurationKeys.JOB_NAME_KEY);
        MDC.remove(ConfigurationKeys.JOB_KEY_KEY);
    }
}
Also used : Closer(com.google.common.io.Closer) WorkUnitStream(org.apache.gobblin.source.workunit.WorkUnitStream) BasicWorkUnitStream(org.apache.gobblin.source.workunit.BasicWorkUnitStream) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) TimingEvent(org.apache.gobblin.metrics.event.TimingEvent) CloseableJobListener(org.apache.gobblin.runtime.listeners.CloseableJobListener) JobListener(org.apache.gobblin.runtime.listeners.JobListener) JobLockException(org.apache.gobblin.runtime.locks.JobLockException) IOException(java.io.IOException) WorkUnitStreamSource(org.apache.gobblin.source.WorkUnitStreamSource) MultiWorkUnit(org.apache.gobblin.source.workunit.MultiWorkUnit) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit)

Example 2 with WorkUnitStream

use of org.apache.gobblin.source.workunit.WorkUnitStream in project incubator-gobblin by apache.

the class DatasetFinderSourceTest method testDrilledDown.

@Test
public void testDrilledDown() {
    Dataset dataset1 = new SimpleDatasetForTesting("dataset1");
    Dataset dataset2 = new SimplePartitionableDatasetForTesting("dataset2", Lists.newArrayList(new SimpleDatasetPartitionForTesting("p1"), new SimpleDatasetPartitionForTesting("p2")));
    Dataset dataset3 = new SimpleDatasetForTesting("dataset3");
    IterableDatasetFinder finder = new StaticDatasetsFinderForTesting(Lists.newArrayList(dataset1, dataset2, dataset3));
    MySource mySource = new MySource(true, finder);
    List<WorkUnit> workUnits = mySource.getWorkunits(new SourceState());
    Assert.assertEquals(workUnits.size(), 4);
    Assert.assertEquals(workUnits.get(0).getProp(DATASET_URN), "dataset1");
    Assert.assertNull(workUnits.get(0).getProp(PARTITION_URN));
    Assert.assertEquals(workUnits.get(1).getProp(DATASET_URN), "dataset2");
    Assert.assertEquals(workUnits.get(1).getProp(PARTITION_URN), "p1");
    Assert.assertEquals(workUnits.get(2).getProp(DATASET_URN), "dataset2");
    Assert.assertEquals(workUnits.get(2).getProp(PARTITION_URN), "p2");
    Assert.assertEquals(workUnits.get(3).getProp(DATASET_URN), "dataset3");
    Assert.assertNull(workUnits.get(3).getProp(PARTITION_URN));
    WorkUnitStream workUnitStream = mySource.getWorkunitStream(new SourceState());
    Assert.assertEquals(Lists.newArrayList(workUnitStream.getWorkUnits()), workUnits);
}
Also used : SimpleDatasetPartitionForTesting(org.apache.gobblin.dataset.test.SimpleDatasetPartitionForTesting) WorkUnitStream(org.apache.gobblin.source.workunit.WorkUnitStream) SimpleDatasetForTesting(org.apache.gobblin.dataset.test.SimpleDatasetForTesting) SourceState(org.apache.gobblin.configuration.SourceState) IterableDatasetFinder(org.apache.gobblin.dataset.IterableDatasetFinder) PartitionableDataset(org.apache.gobblin.dataset.PartitionableDataset) Dataset(org.apache.gobblin.dataset.Dataset) SimplePartitionableDatasetForTesting(org.apache.gobblin.dataset.test.SimplePartitionableDatasetForTesting) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) StaticDatasetsFinderForTesting(org.apache.gobblin.dataset.test.StaticDatasetsFinderForTesting) Test(org.testng.annotations.Test)

Example 3 with WorkUnitStream

use of org.apache.gobblin.source.workunit.WorkUnitStream in project incubator-gobblin by apache.

the class LoopingDatasetFinderSourceTest method testNonDrilldown.

@Test
public void testNonDrilldown() {
    Dataset dataset1 = new SimpleDatasetForTesting("dataset1");
    Dataset dataset2 = new SimplePartitionableDatasetForTesting("dataset2", Lists.newArrayList(new SimpleDatasetPartitionForTesting("p1"), new SimpleDatasetPartitionForTesting("p2")));
    Dataset dataset3 = new SimpleDatasetForTesting("dataset3");
    Dataset dataset4 = new SimpleDatasetForTesting("dataset4");
    Dataset dataset5 = new SimpleDatasetForTesting("dataset5");
    IterableDatasetFinder finder = new StaticDatasetsFinderForTesting(Lists.newArrayList(dataset5, dataset4, dataset3, dataset2, dataset1));
    MySource mySource = new MySource(false, finder);
    SourceState sourceState = new SourceState();
    sourceState.setProp(LoopingDatasetFinderSource.MAX_WORK_UNITS_PER_RUN_KEY, 3);
    WorkUnitStream workUnitStream = mySource.getWorkunitStream(sourceState);
    List<WorkUnit> workUnits = Lists.newArrayList(workUnitStream.getWorkUnits());
    Assert.assertEquals(workUnits.size(), 3);
    Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset1");
    Assert.assertNull(workUnits.get(0).getProp(DatasetFinderSourceTest.PARTITION_URN));
    Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset2");
    Assert.assertNull(workUnits.get(1).getProp(DatasetFinderSourceTest.PARTITION_URN));
    Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset3");
    Assert.assertNull(workUnits.get(2).getProp(DatasetFinderSourceTest.PARTITION_URN));
    // Second run should continue where it left off
    List<WorkUnitState> workUnitStates = workUnits.stream().map(WorkUnitState::new).collect(Collectors.toList());
    SourceState sourceStateSpy = Mockito.spy(sourceState);
    Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates();
    workUnitStream = mySource.getWorkunitStream(sourceStateSpy);
    workUnits = Lists.newArrayList(workUnitStream.getWorkUnits());
    Assert.assertEquals(workUnits.size(), 3);
    Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset4");
    Assert.assertNull(workUnits.get(0).getProp(DatasetFinderSourceTest.PARTITION_URN));
    Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset5");
    Assert.assertNull(workUnits.get(1).getProp(DatasetFinderSourceTest.PARTITION_URN));
    Assert.assertTrue(workUnits.get(2).getPropAsBoolean(LoopingDatasetFinderSource.END_OF_DATASETS_KEY));
    // Loop around
    workUnitStates = workUnits.stream().map(WorkUnitState::new).collect(Collectors.toList());
    Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates();
    workUnitStream = mySource.getWorkunitStream(sourceStateSpy);
    workUnits = Lists.newArrayList(workUnitStream.getWorkUnits());
    Assert.assertEquals(workUnits.size(), 3);
    Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset1");
    Assert.assertNull(workUnits.get(0).getProp(DatasetFinderSourceTest.PARTITION_URN));
    Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset2");
    Assert.assertNull(workUnits.get(1).getProp(DatasetFinderSourceTest.PARTITION_URN));
    Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset3");
    Assert.assertNull(workUnits.get(2).getProp(DatasetFinderSourceTest.PARTITION_URN));
}
Also used : SimpleDatasetPartitionForTesting(org.apache.gobblin.dataset.test.SimpleDatasetPartitionForTesting) WorkUnitStream(org.apache.gobblin.source.workunit.WorkUnitStream) SimpleDatasetForTesting(org.apache.gobblin.dataset.test.SimpleDatasetForTesting) SourceState(org.apache.gobblin.configuration.SourceState) IterableDatasetFinder(org.apache.gobblin.dataset.IterableDatasetFinder) PartitionableDataset(org.apache.gobblin.dataset.PartitionableDataset) Dataset(org.apache.gobblin.dataset.Dataset) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) SimplePartitionableDatasetForTesting(org.apache.gobblin.dataset.test.SimplePartitionableDatasetForTesting) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) StaticDatasetsFinderForTesting(org.apache.gobblin.dataset.test.StaticDatasetsFinderForTesting) Test(org.testng.annotations.Test)

Example 4 with WorkUnitStream

use of org.apache.gobblin.source.workunit.WorkUnitStream in project incubator-gobblin by apache.

the class LoopingDatasetFinderSourceTest method testDrilldown.

@Test
public void testDrilldown() {
    // Create three datasets, two of them partitioned
    Dataset dataset1 = new SimpleDatasetForTesting("dataset1");
    Dataset dataset2 = new SimplePartitionableDatasetForTesting("dataset2", Lists.newArrayList(new SimpleDatasetPartitionForTesting("p1"), new SimpleDatasetPartitionForTesting("p2"), new SimpleDatasetPartitionForTesting("p3")));
    Dataset dataset3 = new SimplePartitionableDatasetForTesting("dataset3", Lists.newArrayList(new SimpleDatasetPartitionForTesting("p1"), new SimpleDatasetPartitionForTesting("p2"), new SimpleDatasetPartitionForTesting("p3")));
    IterableDatasetFinder finder = new StaticDatasetsFinderForTesting(Lists.newArrayList(dataset3, dataset2, dataset1));
    MySource mySource = new MySource(true, finder);
    // Limit to 3 wunits per run
    SourceState sourceState = new SourceState();
    sourceState.setProp(LoopingDatasetFinderSource.MAX_WORK_UNITS_PER_RUN_KEY, 3);
    // first run, get three first work units
    WorkUnitStream workUnitStream = mySource.getWorkunitStream(sourceState);
    List<WorkUnit> workUnits = Lists.newArrayList(workUnitStream.getWorkUnits());
    Assert.assertEquals(workUnits.size(), 3);
    Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset1");
    Assert.assertNull(workUnits.get(0).getProp(DatasetFinderSourceTest.PARTITION_URN));
    Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset2");
    Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.PARTITION_URN), "p1");
    Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset2");
    Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.PARTITION_URN), "p2");
    // Second run should continue where it left off
    List<WorkUnitState> workUnitStates = workUnits.stream().map(WorkUnitState::new).collect(Collectors.toList());
    SourceState sourceStateSpy = Mockito.spy(sourceState);
    Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates();
    workUnitStream = mySource.getWorkunitStream(sourceStateSpy);
    workUnits = Lists.newArrayList(workUnitStream.getWorkUnits());
    Assert.assertEquals(workUnits.size(), 3);
    Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset2");
    Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.PARTITION_URN), "p3");
    Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset3");
    Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.PARTITION_URN), "p1");
    Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset3");
    Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.PARTITION_URN), "p2");
    // third run, continue from where it left off
    workUnitStates = workUnits.stream().map(WorkUnitState::new).collect(Collectors.toList());
    Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates();
    workUnitStream = mySource.getWorkunitStream(sourceStateSpy);
    workUnits = Lists.newArrayList(workUnitStream.getWorkUnits());
    Assert.assertEquals(workUnits.size(), 2);
    Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset3");
    Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.PARTITION_URN), "p3");
    Assert.assertTrue(workUnits.get(1).getPropAsBoolean(LoopingDatasetFinderSource.END_OF_DATASETS_KEY));
    // fourth run, finished all work units, loop around
    workUnitStates = workUnits.stream().map(WorkUnitState::new).collect(Collectors.toList());
    Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates();
    workUnitStream = mySource.getWorkunitStream(sourceStateSpy);
    workUnits = Lists.newArrayList(workUnitStream.getWorkUnits());
    Assert.assertEquals(workUnits.size(), 3);
    Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset1");
    Assert.assertNull(workUnits.get(0).getProp(DatasetFinderSourceTest.PARTITION_URN));
    Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset2");
    Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.PARTITION_URN), "p1");
    Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset2");
    Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.PARTITION_URN), "p2");
}
Also used : SimpleDatasetPartitionForTesting(org.apache.gobblin.dataset.test.SimpleDatasetPartitionForTesting) WorkUnitStream(org.apache.gobblin.source.workunit.WorkUnitStream) SimpleDatasetForTesting(org.apache.gobblin.dataset.test.SimpleDatasetForTesting) SourceState(org.apache.gobblin.configuration.SourceState) IterableDatasetFinder(org.apache.gobblin.dataset.IterableDatasetFinder) PartitionableDataset(org.apache.gobblin.dataset.PartitionableDataset) Dataset(org.apache.gobblin.dataset.Dataset) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) SimplePartitionableDatasetForTesting(org.apache.gobblin.dataset.test.SimplePartitionableDatasetForTesting) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) StaticDatasetsFinderForTesting(org.apache.gobblin.dataset.test.StaticDatasetsFinderForTesting) Test(org.testng.annotations.Test)

Example 5 with WorkUnitStream

use of org.apache.gobblin.source.workunit.WorkUnitStream in project incubator-gobblin by apache.

the class DatasetFinderSourceTest method testNonDrilledDown.

@Test
public void testNonDrilledDown() {
    Dataset dataset1 = new SimpleDatasetForTesting("dataset1");
    Dataset dataset2 = new SimplePartitionableDatasetForTesting("dataset2", Lists.newArrayList(new SimpleDatasetPartitionForTesting("p1"), new SimpleDatasetPartitionForTesting("p2")));
    Dataset dataset3 = new SimpleDatasetForTesting("dataset3");
    IterableDatasetFinder finder = new StaticDatasetsFinderForTesting(Lists.newArrayList(dataset1, dataset2, dataset3));
    MySource mySource = new MySource(false, finder);
    List<WorkUnit> workUnits = mySource.getWorkunits(new SourceState());
    Assert.assertEquals(workUnits.size(), 3);
    Assert.assertEquals(workUnits.get(0).getProp(DATASET_URN), "dataset1");
    Assert.assertNull(workUnits.get(0).getProp(PARTITION_URN));
    Assert.assertEquals(workUnits.get(1).getProp(DATASET_URN), "dataset2");
    Assert.assertNull(workUnits.get(1).getProp(PARTITION_URN));
    Assert.assertEquals(workUnits.get(2).getProp(DATASET_URN), "dataset3");
    Assert.assertNull(workUnits.get(2).getProp(PARTITION_URN));
    WorkUnitStream workUnitStream = mySource.getWorkunitStream(new SourceState());
    Assert.assertEquals(Lists.newArrayList(workUnitStream.getWorkUnits()), workUnits);
}
Also used : SimpleDatasetPartitionForTesting(org.apache.gobblin.dataset.test.SimpleDatasetPartitionForTesting) WorkUnitStream(org.apache.gobblin.source.workunit.WorkUnitStream) SimpleDatasetForTesting(org.apache.gobblin.dataset.test.SimpleDatasetForTesting) SourceState(org.apache.gobblin.configuration.SourceState) IterableDatasetFinder(org.apache.gobblin.dataset.IterableDatasetFinder) PartitionableDataset(org.apache.gobblin.dataset.PartitionableDataset) Dataset(org.apache.gobblin.dataset.Dataset) SimplePartitionableDatasetForTesting(org.apache.gobblin.dataset.test.SimplePartitionableDatasetForTesting) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) StaticDatasetsFinderForTesting(org.apache.gobblin.dataset.test.StaticDatasetsFinderForTesting) Test(org.testng.annotations.Test)

Aggregations

WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)6 WorkUnitStream (org.apache.gobblin.source.workunit.WorkUnitStream)6 SourceState (org.apache.gobblin.configuration.SourceState)5 Dataset (org.apache.gobblin.dataset.Dataset)5 IterableDatasetFinder (org.apache.gobblin.dataset.IterableDatasetFinder)5 PartitionableDataset (org.apache.gobblin.dataset.PartitionableDataset)5 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)4 SimpleDatasetForTesting (org.apache.gobblin.dataset.test.SimpleDatasetForTesting)4 SimpleDatasetPartitionForTesting (org.apache.gobblin.dataset.test.SimpleDatasetPartitionForTesting)4 SimplePartitionableDatasetForTesting (org.apache.gobblin.dataset.test.SimplePartitionableDatasetForTesting)4 StaticDatasetsFinderForTesting (org.apache.gobblin.dataset.test.StaticDatasetsFinderForTesting)4 Test (org.testng.annotations.Test)4 IOException (java.io.IOException)2 BasicWorkUnitStream (org.apache.gobblin.source.workunit.BasicWorkUnitStream)2 AbstractIterator (com.google.common.collect.AbstractIterator)1 Iterators (com.google.common.collect.Iterators)1 Lists (com.google.common.collect.Lists)1 PeekingIterator (com.google.common.collect.PeekingIterator)1 Closer (com.google.common.io.Closer)1 Iterator (java.util.Iterator)1