use of org.apache.gobblin.source.workunit.WorkUnitStream in project incubator-gobblin by apache.
the class LoopingDatasetFinderSource method getWorkunitStream.
@Override
public WorkUnitStream getWorkunitStream(SourceState state) {
try {
int maxWorkUnits = state.getPropAsInt(MAX_WORK_UNITS_PER_RUN_KEY, MAX_WORK_UNITS_PER_RUN);
List<WorkUnitState> previousWorkUnitStates = state.getPreviousWorkUnitStates();
Optional<WorkUnitState> maxWorkUnit;
try {
maxWorkUnit = previousWorkUnitStates.stream().reduce((wu1, wu2) -> {
int wu1Ordinal = wu1.getPropAsInt(WORK_UNIT_ORDINAL);
int wu2Ordinal = wu2.getPropAsInt(WORK_UNIT_ORDINAL);
return wu1Ordinal > wu2Ordinal ? wu1 : wu2;
});
} catch (NumberFormatException nfe) {
throw new RuntimeException("Work units in state store are corrupted! Missing or malformed " + WORK_UNIT_ORDINAL);
}
String previousDatasetUrnWatermark = null;
String previousPartitionUrnWatermark = null;
if (maxWorkUnit.isPresent() && !maxWorkUnit.get().getPropAsBoolean(END_OF_DATASETS_KEY, false)) {
previousDatasetUrnWatermark = maxWorkUnit.get().getProp(DATASET_URN);
previousPartitionUrnWatermark = maxWorkUnit.get().getProp(PARTITION_URN);
}
IterableDatasetFinder datasetsFinder = createDatasetsFinder(state);
Stream<Dataset> datasetStream = datasetsFinder.getDatasetsStream(Spliterator.SORTED, this.lexicographicalComparator);
datasetStream = sortStreamLexicographically(datasetStream);
return new BasicWorkUnitStream.Builder(new DeepIterator(datasetStream.iterator(), previousDatasetUrnWatermark, previousPartitionUrnWatermark, maxWorkUnits)).setFiniteStream(true).build();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
Aggregations