Search in sources :

Example 11 with WatermarkInterval

use of org.apache.gobblin.source.extractor.WatermarkInterval in project incubator-gobblin by apache.

the class SequentialTestSource method getWorkunits.

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
    configureIfNeeded(ConfigFactory.parseProperties(state.getProperties()));
    final List<WorkUnitState> previousWorkUnitStates = state.getPreviousWorkUnitStates();
    if (!previousWorkUnitStates.isEmpty()) {
        List<WorkUnit> newWorkUnits = Lists.newArrayListWithCapacity(previousWorkUnitStates.size());
        int i = 0;
        for (WorkUnitState workUnitState : previousWorkUnitStates) {
            WorkUnit workUnit;
            if (workUnitState.getWorkingState().equals(WorkUnitState.WorkingState.COMMITTED)) {
                LongWatermark watermark = workUnitState.getActualHighWatermark(LongWatermark.class);
                LongWatermark expectedWatermark = new LongWatermark(watermark.getValue() + numRecordsPerExtract);
                WatermarkInterval watermarkInterval = new WatermarkInterval(watermark, expectedWatermark);
                workUnit = WorkUnit.create(newExtract(tableType, namespace, table), watermarkInterval);
                log.debug("Will be setting watermark interval to " + watermarkInterval.toJson());
                workUnit.setProp(WORK_UNIT_INDEX, workUnitState.getWorkunit().getProp(WORK_UNIT_INDEX));
            } else {
                // retry
                LongWatermark watermark = workUnitState.getWorkunit().getLowWatermark(LongWatermark.class);
                LongWatermark expectedWatermark = new LongWatermark(watermark.getValue() + numRecordsPerExtract);
                WatermarkInterval watermarkInterval = new WatermarkInterval(watermark, expectedWatermark);
                workUnit = WorkUnit.create(newExtract(tableType, namespace, table), watermarkInterval);
                log.debug("Will be setting watermark interval to " + watermarkInterval.toJson());
                workUnit.setProp(WORK_UNIT_INDEX, workUnitState.getWorkunit().getProp(WORK_UNIT_INDEX));
            }
            newWorkUnits.add(workUnit);
        }
        return newWorkUnits;
    } else {
        return initialWorkUnits();
    }
}
Also used : WatermarkInterval(org.apache.gobblin.source.extractor.WatermarkInterval) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark)

Aggregations

WatermarkInterval (org.apache.gobblin.source.extractor.WatermarkInterval)11 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)8 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)6 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)4 MultiWorkUnit (org.apache.gobblin.source.workunit.MultiWorkUnit)3 Map (java.util.Map)2 State (org.apache.gobblin.configuration.State)2 SchemaNotFoundException (org.apache.gobblin.data.management.conversion.hive.avro.SchemaNotFoundException)2 ConvertibleHiveDataset (org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDataset)2 UpdateNotFoundException (org.apache.gobblin.data.management.conversion.hive.provider.UpdateNotFoundException)2 MultiLongWatermark (org.apache.gobblin.source.extractor.extract.kafka.MultiLongWatermark)2 Extract (org.apache.gobblin.source.workunit.Extract)2 DateTime (org.joda.time.DateTime)2 Predicate (com.google.common.base.Predicate)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 UncheckedExecutionException (com.google.common.util.concurrent.UncheckedExecutionException)1 IOException (java.io.IOException)1 LinkedList (java.util.LinkedList)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 Nonnull (javax.annotation.Nonnull)1