use of org.apache.gobblin.source.workunit.MultiWorkUnit in project incubator-gobblin by apache.
the class SingleTask method getWorkUnits.
private List<WorkUnit> getWorkUnits() throws IOException {
String fileName = _workUnitFilePath.getName();
String storeName = _workUnitFilePath.getParent().getName();
WorkUnit workUnit;
if (_workUnitFilePath.getName().endsWith(AbstractJobLauncher.MULTI_WORK_UNIT_FILE_EXTENSION)) {
workUnit = _stateStores.getMwuStateStore().getAll(storeName, fileName).get(0);
} else {
workUnit = _stateStores.getWuStateStore().getAll(storeName, fileName).get(0);
}
// The list of individual WorkUnits (flattened) to run
List<WorkUnit> workUnits = Lists.newArrayList();
if (workUnit instanceof MultiWorkUnit) {
// Flatten the MultiWorkUnit so the job configuration properties can be added to each individual WorkUnits
List<WorkUnit> flattenedWorkUnits = JobLauncherUtils.flattenWorkUnits(((MultiWorkUnit) workUnit).getWorkUnits());
workUnits.addAll(flattenedWorkUnits);
} else {
workUnits.add(workUnit);
}
return workUnits;
}
use of org.apache.gobblin.source.workunit.MultiWorkUnit in project incubator-gobblin by apache.
the class KafkaBiLevelWorkUnitPacker method pack.
@Override
public List<WorkUnit> pack(Map<String, List<WorkUnit>> workUnitsByTopic, int numContainers) {
double totalEstDataSize = setWorkUnitEstSizes(workUnitsByTopic);
double avgGroupSize = totalEstDataSize / numContainers / getPreGroupingSizeFactor(this.state);
List<MultiWorkUnit> mwuGroups = Lists.newArrayList();
for (List<WorkUnit> workUnitsForTopic : workUnitsByTopic.values()) {
double estimatedDataSizeForTopic = calcTotalEstSizeForTopic(workUnitsForTopic);
if (estimatedDataSizeForTopic < avgGroupSize) {
// If the total estimated size of a topic is smaller than group size, put all partitions of this
// topic in a single group.
MultiWorkUnit mwuGroup = MultiWorkUnit.createEmpty();
addWorkUnitsToMultiWorkUnit(workUnitsForTopic, mwuGroup);
mwuGroups.add(mwuGroup);
} else {
// Use best-fit-decreasing to group workunits for a topic into multiple groups.
mwuGroups.addAll(bestFitDecreasingBinPacking(workUnitsForTopic, avgGroupSize));
}
}
List<WorkUnit> groups = squeezeMultiWorkUnits(mwuGroups);
return worstFitDecreasingBinPacking(groups, numContainers);
}
use of org.apache.gobblin.source.workunit.MultiWorkUnit in project incubator-gobblin by apache.
the class KafkaSingleLevelWorkUnitPacker method pack.
@Override
public List<WorkUnit> pack(Map<String, List<WorkUnit>> workUnitsByTopic, int numContainers) {
setWorkUnitEstSizes(workUnitsByTopic);
List<WorkUnit> workUnits = Lists.newArrayList();
for (List<WorkUnit> workUnitsForTopic : workUnitsByTopic.values()) {
// For each topic, merge all empty workunits into a single workunit, so that a single
// empty task will be created instead of many.
MultiWorkUnit zeroSizeWorkUnit = MultiWorkUnit.createEmpty();
for (WorkUnit workUnit : workUnitsForTopic) {
if (DoubleMath.fuzzyEquals(getWorkUnitEstSize(workUnit), 0.0, EPS)) {
addWorkUnitToMultiWorkUnit(workUnit, zeroSizeWorkUnit);
} else {
workUnit.setWatermarkInterval(getWatermarkIntervalFromWorkUnit(workUnit));
workUnits.add(workUnit);
}
}
if (!zeroSizeWorkUnit.getWorkUnits().isEmpty()) {
workUnits.add(squeezeMultiWorkUnit(zeroSizeWorkUnit));
}
}
return worstFitDecreasingBinPacking(workUnits, numContainers);
}
use of org.apache.gobblin.source.workunit.MultiWorkUnit in project incubator-gobblin by apache.
the class MultiWorkUnitUnpackingIteratorTest method createWorkUnit.
private WorkUnit createWorkUnit(String... names) {
if (names.length == 1) {
WorkUnit workUnit = new WorkUnit();
workUnit.setProp(WORK_UNIT_NAME, names[0]);
return workUnit;
}
MultiWorkUnit mwu = new MultiWorkUnit();
for (String name : names) {
mwu.addWorkUnit(createWorkUnit(name));
}
return mwu;
}
use of org.apache.gobblin.source.workunit.MultiWorkUnit in project incubator-gobblin by apache.
the class TestSource method getWorkunits.
@Override
public List<WorkUnit> getWorkunits(SourceState state) {
String nameSpace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY);
Extract extract1 = createExtract(TableType.SNAPSHOT_ONLY, nameSpace, "TestTable1");
Extract extract2 = createExtract(TableType.SNAPSHOT_ONLY, nameSpace, "TestTable2");
String sourceFileList = state.getProp(SOURCE_FILE_LIST_KEY);
List<String> list = SPLITTER.splitToList(sourceFileList);
List<WorkUnit> workUnits = Lists.newArrayList();
for (int i = 0; i < list.size(); i++) {
WorkUnit workUnit = WorkUnit.create(i % 2 == 0 ? extract1 : extract2);
workUnit.setProp(SOURCE_FILE_KEY, list.get(i));
workUnits.add(workUnit);
}
if (state.getPropAsBoolean("use.multiworkunit", false)) {
MultiWorkUnit multiWorkUnit = MultiWorkUnit.createEmpty();
multiWorkUnit.addWorkUnits(workUnits);
workUnits.clear();
workUnits.add(multiWorkUnit);
}
return workUnits;
}
Aggregations