use of org.apache.gobblin.dataset.test.SimpleDatasetPartitionForTesting in project incubator-gobblin by apache.
the class DatasetFinderSourceTest method testDrilledDown.
@Test
public void testDrilledDown() {
Dataset dataset1 = new SimpleDatasetForTesting("dataset1");
Dataset dataset2 = new SimplePartitionableDatasetForTesting("dataset2", Lists.newArrayList(new SimpleDatasetPartitionForTesting("p1"), new SimpleDatasetPartitionForTesting("p2")));
Dataset dataset3 = new SimpleDatasetForTesting("dataset3");
IterableDatasetFinder finder = new StaticDatasetsFinderForTesting(Lists.newArrayList(dataset1, dataset2, dataset3));
MySource mySource = new MySource(true, finder);
List<WorkUnit> workUnits = mySource.getWorkunits(new SourceState());
Assert.assertEquals(workUnits.size(), 4);
Assert.assertEquals(workUnits.get(0).getProp(DATASET_URN), "dataset1");
Assert.assertNull(workUnits.get(0).getProp(PARTITION_URN));
Assert.assertEquals(workUnits.get(1).getProp(DATASET_URN), "dataset2");
Assert.assertEquals(workUnits.get(1).getProp(PARTITION_URN), "p1");
Assert.assertEquals(workUnits.get(2).getProp(DATASET_URN), "dataset2");
Assert.assertEquals(workUnits.get(2).getProp(PARTITION_URN), "p2");
Assert.assertEquals(workUnits.get(3).getProp(DATASET_URN), "dataset3");
Assert.assertNull(workUnits.get(3).getProp(PARTITION_URN));
WorkUnitStream workUnitStream = mySource.getWorkunitStream(new SourceState());
Assert.assertEquals(Lists.newArrayList(workUnitStream.getWorkUnits()), workUnits);
}
use of org.apache.gobblin.dataset.test.SimpleDatasetPartitionForTesting in project incubator-gobblin by apache.
the class LoopingDatasetFinderSourceTest method testNonDrilldown.
@Test
public void testNonDrilldown() {
Dataset dataset1 = new SimpleDatasetForTesting("dataset1");
Dataset dataset2 = new SimplePartitionableDatasetForTesting("dataset2", Lists.newArrayList(new SimpleDatasetPartitionForTesting("p1"), new SimpleDatasetPartitionForTesting("p2")));
Dataset dataset3 = new SimpleDatasetForTesting("dataset3");
Dataset dataset4 = new SimpleDatasetForTesting("dataset4");
Dataset dataset5 = new SimpleDatasetForTesting("dataset5");
IterableDatasetFinder finder = new StaticDatasetsFinderForTesting(Lists.newArrayList(dataset5, dataset4, dataset3, dataset2, dataset1));
MySource mySource = new MySource(false, finder);
SourceState sourceState = new SourceState();
sourceState.setProp(LoopingDatasetFinderSource.MAX_WORK_UNITS_PER_RUN_KEY, 3);
WorkUnitStream workUnitStream = mySource.getWorkunitStream(sourceState);
List<WorkUnit> workUnits = Lists.newArrayList(workUnitStream.getWorkUnits());
Assert.assertEquals(workUnits.size(), 3);
Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset1");
Assert.assertNull(workUnits.get(0).getProp(DatasetFinderSourceTest.PARTITION_URN));
Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset2");
Assert.assertNull(workUnits.get(1).getProp(DatasetFinderSourceTest.PARTITION_URN));
Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset3");
Assert.assertNull(workUnits.get(2).getProp(DatasetFinderSourceTest.PARTITION_URN));
// Second run should continue where it left off
List<WorkUnitState> workUnitStates = workUnits.stream().map(WorkUnitState::new).collect(Collectors.toList());
SourceState sourceStateSpy = Mockito.spy(sourceState);
Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates();
workUnitStream = mySource.getWorkunitStream(sourceStateSpy);
workUnits = Lists.newArrayList(workUnitStream.getWorkUnits());
Assert.assertEquals(workUnits.size(), 3);
Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset4");
Assert.assertNull(workUnits.get(0).getProp(DatasetFinderSourceTest.PARTITION_URN));
Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset5");
Assert.assertNull(workUnits.get(1).getProp(DatasetFinderSourceTest.PARTITION_URN));
Assert.assertTrue(workUnits.get(2).getPropAsBoolean(LoopingDatasetFinderSource.END_OF_DATASETS_KEY));
// Loop around
workUnitStates = workUnits.stream().map(WorkUnitState::new).collect(Collectors.toList());
Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates();
workUnitStream = mySource.getWorkunitStream(sourceStateSpy);
workUnits = Lists.newArrayList(workUnitStream.getWorkUnits());
Assert.assertEquals(workUnits.size(), 3);
Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset1");
Assert.assertNull(workUnits.get(0).getProp(DatasetFinderSourceTest.PARTITION_URN));
Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset2");
Assert.assertNull(workUnits.get(1).getProp(DatasetFinderSourceTest.PARTITION_URN));
Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset3");
Assert.assertNull(workUnits.get(2).getProp(DatasetFinderSourceTest.PARTITION_URN));
}
use of org.apache.gobblin.dataset.test.SimpleDatasetPartitionForTesting in project incubator-gobblin by apache.
the class LoopingDatasetFinderSourceTest method testDrilldown.
@Test
public void testDrilldown() {
// Create three datasets, two of them partitioned
Dataset dataset1 = new SimpleDatasetForTesting("dataset1");
Dataset dataset2 = new SimplePartitionableDatasetForTesting("dataset2", Lists.newArrayList(new SimpleDatasetPartitionForTesting("p1"), new SimpleDatasetPartitionForTesting("p2"), new SimpleDatasetPartitionForTesting("p3")));
Dataset dataset3 = new SimplePartitionableDatasetForTesting("dataset3", Lists.newArrayList(new SimpleDatasetPartitionForTesting("p1"), new SimpleDatasetPartitionForTesting("p2"), new SimpleDatasetPartitionForTesting("p3")));
IterableDatasetFinder finder = new StaticDatasetsFinderForTesting(Lists.newArrayList(dataset3, dataset2, dataset1));
MySource mySource = new MySource(true, finder);
// Limit to 3 wunits per run
SourceState sourceState = new SourceState();
sourceState.setProp(LoopingDatasetFinderSource.MAX_WORK_UNITS_PER_RUN_KEY, 3);
// first run, get three first work units
WorkUnitStream workUnitStream = mySource.getWorkunitStream(sourceState);
List<WorkUnit> workUnits = Lists.newArrayList(workUnitStream.getWorkUnits());
Assert.assertEquals(workUnits.size(), 3);
Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset1");
Assert.assertNull(workUnits.get(0).getProp(DatasetFinderSourceTest.PARTITION_URN));
Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset2");
Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.PARTITION_URN), "p1");
Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset2");
Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.PARTITION_URN), "p2");
// Second run should continue where it left off
List<WorkUnitState> workUnitStates = workUnits.stream().map(WorkUnitState::new).collect(Collectors.toList());
SourceState sourceStateSpy = Mockito.spy(sourceState);
Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates();
workUnitStream = mySource.getWorkunitStream(sourceStateSpy);
workUnits = Lists.newArrayList(workUnitStream.getWorkUnits());
Assert.assertEquals(workUnits.size(), 3);
Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset2");
Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.PARTITION_URN), "p3");
Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset3");
Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.PARTITION_URN), "p1");
Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset3");
Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.PARTITION_URN), "p2");
// third run, continue from where it left off
workUnitStates = workUnits.stream().map(WorkUnitState::new).collect(Collectors.toList());
Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates();
workUnitStream = mySource.getWorkunitStream(sourceStateSpy);
workUnits = Lists.newArrayList(workUnitStream.getWorkUnits());
Assert.assertEquals(workUnits.size(), 2);
Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset3");
Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.PARTITION_URN), "p3");
Assert.assertTrue(workUnits.get(1).getPropAsBoolean(LoopingDatasetFinderSource.END_OF_DATASETS_KEY));
// fourth run, finished all work units, loop around
workUnitStates = workUnits.stream().map(WorkUnitState::new).collect(Collectors.toList());
Mockito.doReturn(workUnitStates).when(sourceStateSpy).getPreviousWorkUnitStates();
workUnitStream = mySource.getWorkunitStream(sourceStateSpy);
workUnits = Lists.newArrayList(workUnitStream.getWorkUnits());
Assert.assertEquals(workUnits.size(), 3);
Assert.assertEquals(workUnits.get(0).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset1");
Assert.assertNull(workUnits.get(0).getProp(DatasetFinderSourceTest.PARTITION_URN));
Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset2");
Assert.assertEquals(workUnits.get(1).getProp(DatasetFinderSourceTest.PARTITION_URN), "p1");
Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.DATASET_URN), "dataset2");
Assert.assertEquals(workUnits.get(2).getProp(DatasetFinderSourceTest.PARTITION_URN), "p2");
}
use of org.apache.gobblin.dataset.test.SimpleDatasetPartitionForTesting in project incubator-gobblin by apache.
the class DatasetFinderSourceTest method testNonDrilledDown.
@Test
public void testNonDrilledDown() {
Dataset dataset1 = new SimpleDatasetForTesting("dataset1");
Dataset dataset2 = new SimplePartitionableDatasetForTesting("dataset2", Lists.newArrayList(new SimpleDatasetPartitionForTesting("p1"), new SimpleDatasetPartitionForTesting("p2")));
Dataset dataset3 = new SimpleDatasetForTesting("dataset3");
IterableDatasetFinder finder = new StaticDatasetsFinderForTesting(Lists.newArrayList(dataset1, dataset2, dataset3));
MySource mySource = new MySource(false, finder);
List<WorkUnit> workUnits = mySource.getWorkunits(new SourceState());
Assert.assertEquals(workUnits.size(), 3);
Assert.assertEquals(workUnits.get(0).getProp(DATASET_URN), "dataset1");
Assert.assertNull(workUnits.get(0).getProp(PARTITION_URN));
Assert.assertEquals(workUnits.get(1).getProp(DATASET_URN), "dataset2");
Assert.assertNull(workUnits.get(1).getProp(PARTITION_URN));
Assert.assertEquals(workUnits.get(2).getProp(DATASET_URN), "dataset3");
Assert.assertNull(workUnits.get(2).getProp(PARTITION_URN));
WorkUnitStream workUnitStream = mySource.getWorkunitStream(new SourceState());
Assert.assertEquals(Lists.newArrayList(workUnitStream.getWorkUnits()), workUnits);
}
Aggregations