use of org.apache.gobblin.source.workunit.MultiWorkUnit in project incubator-gobblin by apache.
the class MultiWorkUnitUnpackingIterator method next.
@Override
public WorkUnit next() {
// In case, the caller forgets to call hasNext()
seekNext();
WorkUnit wu = nextWu;
if (nextWu instanceof MultiWorkUnit) {
wu = this.currentIterator.next();
}
needSeek = true;
return wu;
}
use of org.apache.gobblin.source.workunit.MultiWorkUnit in project incubator-gobblin by apache.
the class MultiWorkUnitTest method testSerDe.
@Test
public void testSerDe() throws IOException {
Closer closer = Closer.create();
try {
ByteArrayOutputStream baos = closer.register(new ByteArrayOutputStream());
DataOutputStream dos = closer.register(new DataOutputStream(baos));
this.multiWorkUnit.write(dos);
ByteArrayInputStream bais = closer.register((new ByteArrayInputStream(baos.toByteArray())));
DataInputStream dis = closer.register((new DataInputStream(bais)));
MultiWorkUnit copy = new MultiWorkUnit();
copy.readFields(dis);
List<WorkUnit> workUnitList = copy.getWorkUnits();
Assert.assertEquals(workUnitList.size(), 2);
Assert.assertEquals(workUnitList.get(0).getHighWaterMark(), 1000);
Assert.assertEquals(workUnitList.get(0).getLowWaterMark(), 0);
Assert.assertEquals(workUnitList.get(0).getProp("k1"), "v1");
Assert.assertEquals(workUnitList.get(1).getHighWaterMark(), 2000);
Assert.assertEquals(workUnitList.get(1).getLowWaterMark(), 1001);
Assert.assertEquals(workUnitList.get(1).getProp("k2"), "v2");
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
use of org.apache.gobblin.source.workunit.MultiWorkUnit in project incubator-gobblin by apache.
the class MultiWorkUnitTest method setUp.
@BeforeClass
public void setUp() {
this.multiWorkUnit = new MultiWorkUnit();
WorkUnit workUnit1 = WorkUnit.createEmpty();
workUnit1.setHighWaterMark(1000);
workUnit1.setLowWaterMark(0);
workUnit1.setProp("k1", "v1");
this.multiWorkUnit.addWorkUnit(workUnit1);
WorkUnit workUnit2 = WorkUnit.createEmpty();
workUnit2.setHighWaterMark(2000);
workUnit2.setLowWaterMark(1001);
workUnit2.setProp("k2", "v2");
this.multiWorkUnit.addWorkUnit(workUnit2);
}
use of org.apache.gobblin.source.workunit.MultiWorkUnit in project incubator-gobblin by apache.
the class JobLauncherUtilsTest method testFlattenWorkUnits.
@Test
public void testFlattenWorkUnits() {
List<WorkUnit> workUnitsOnly = Arrays.asList(WorkUnit.createEmpty(), WorkUnit.createEmpty(), WorkUnit.createEmpty());
Assert.assertEquals(JobLauncherUtils.flattenWorkUnits(workUnitsOnly).size(), 3);
MultiWorkUnit multiWorkUnit1 = MultiWorkUnit.createEmpty();
multiWorkUnit1.addWorkUnits(Arrays.asList(WorkUnit.createEmpty(), WorkUnit.createEmpty(), WorkUnit.createEmpty()));
MultiWorkUnit multiWorkUnit2 = MultiWorkUnit.createEmpty();
multiWorkUnit1.addWorkUnits(Arrays.asList(WorkUnit.createEmpty(), WorkUnit.createEmpty(), WorkUnit.createEmpty()));
List<WorkUnit> workUnitsAndMultiWorkUnits = Arrays.asList(WorkUnit.createEmpty(), WorkUnit.createEmpty(), WorkUnit.createEmpty(), multiWorkUnit1, multiWorkUnit2);
Assert.assertEquals(JobLauncherUtils.flattenWorkUnits(workUnitsAndMultiWorkUnits).size(), 9);
}
use of org.apache.gobblin.source.workunit.MultiWorkUnit in project incubator-gobblin by apache.
the class GobblinHelixJobLauncher method createJob.
/**
* Create a job from a given batch of {@link WorkUnit}s.
*/
private JobConfig.Builder createJob(List<WorkUnit> workUnits) throws IOException {
Map<String, TaskConfig> taskConfigMap = Maps.newHashMap();
try (ParallelRunner stateSerDeRunner = new ParallelRunner(this.stateSerDeRunnerThreads, this.fs)) {
int multiTaskIdSequence = 0;
for (WorkUnit workUnit : workUnits) {
if (workUnit instanceof MultiWorkUnit) {
workUnit.setId(JobLauncherUtils.newMultiTaskId(this.jobContext.getJobId(), multiTaskIdSequence++));
}
addWorkUnit(workUnit, stateSerDeRunner, taskConfigMap);
}
Path jobStateFilePath;
// write the job.state using the state store if present, otherwise serialize directly to the file
if (this.stateStores.haveJobStateStore()) {
jobStateFilePath = GobblinClusterUtils.getJobStateFilePath(true, this.appWorkDir, this.jobContext.getJobId());
this.stateStores.getJobStateStore().put(jobStateFilePath.getParent().getName(), jobStateFilePath.getName(), this.jobContext.getJobState());
} else {
jobStateFilePath = GobblinClusterUtils.getJobStateFilePath(false, this.appWorkDir, this.jobContext.getJobId());
SerializationUtils.serializeState(this.fs, jobStateFilePath, this.jobContext.getJobState());
}
LOGGER.debug("GobblinHelixJobLauncher.createJob: jobStateFilePath {}, jobState {} jobProperties {}", jobStateFilePath, this.jobContext.getJobState().toString(), this.jobContext.getJobState().getProperties());
}
JobConfig.Builder jobConfigBuilder = new JobConfig.Builder();
jobConfigBuilder.setMaxAttemptsPerTask(this.jobContext.getJobState().getPropAsInt(ConfigurationKeys.MAX_TASK_RETRIES_KEY, ConfigurationKeys.DEFAULT_MAX_TASK_RETRIES));
jobConfigBuilder.setFailureThreshold(workUnits.size());
jobConfigBuilder.addTaskConfigMap(taskConfigMap).setCommand(GobblinTaskRunner.GOBBLIN_TASK_FACTORY_NAME);
jobConfigBuilder.setNumConcurrentTasksPerInstance(ConfigUtils.getInt(jobConfig, GobblinClusterConfigurationKeys.HELIX_CLUSTER_TASK_CONCURRENCY, GobblinClusterConfigurationKeys.HELIX_CLUSTER_TASK_CONCURRENCY_DEFAULT));
if (Task.getExecutionModel(ConfigUtils.configToState(jobConfig)).equals(ExecutionModel.STREAMING)) {
jobConfigBuilder.setRebalanceRunningTask(true);
}
return jobConfigBuilder;
}
Aggregations