use of org.apache.gobblin.source.extractor.extract.kafka.KafkaPartition in project incubator-gobblin by apache.
the class KafkaWorkUnitPacker method squeezeMultiWorkUnit.
/**
* Combine all {@link WorkUnit}s in the {@link MultiWorkUnit} into a single {@link WorkUnit}.
*/
protected WorkUnit squeezeMultiWorkUnit(MultiWorkUnit multiWorkUnit) {
WatermarkInterval interval = getWatermarkIntervalFromMultiWorkUnit(multiWorkUnit);
List<KafkaPartition> partitions = getPartitionsFromMultiWorkUnit(multiWorkUnit);
Preconditions.checkArgument(!partitions.isEmpty(), "There must be at least one partition in the multiWorkUnit");
// Squeeze all partitions from the multiWorkUnit into of one the work units, which can be any one
WorkUnit workUnit = multiWorkUnit.getWorkUnits().get(0);
// Update interval
workUnit.removeProp(ConfigurationKeys.WORK_UNIT_LOW_WATER_MARK_KEY);
workUnit.removeProp(ConfigurationKeys.WORK_UNIT_HIGH_WATER_MARK_KEY);
workUnit.setWatermarkInterval(interval);
// Update offset fetch epoch time and previous latest offset. These are used to compute the load factor,
// gobblin consumption rate relative to the kafka production rate. The kafka rate is computed as
// (current latest offset - previous latest offset)/(current epoch time - previous epoch time).
int index = 0;
for (WorkUnit wu : multiWorkUnit.getWorkUnits()) {
workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.PREVIOUS_OFFSET_FETCH_EPOCH_TIME, index), wu.getProp(KafkaSource.PREVIOUS_OFFSET_FETCH_EPOCH_TIME));
workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.OFFSET_FETCH_EPOCH_TIME, index), wu.getProp(KafkaSource.OFFSET_FETCH_EPOCH_TIME));
workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.PREVIOUS_LATEST_OFFSET, index), wu.getProp(KafkaSource.PREVIOUS_LATEST_OFFSET));
index++;
}
workUnit.removeProp(KafkaSource.PREVIOUS_OFFSET_FETCH_EPOCH_TIME);
workUnit.removeProp(KafkaSource.OFFSET_FETCH_EPOCH_TIME);
workUnit.removeProp(KafkaSource.PREVIOUS_LATEST_OFFSET);
// Remove the original partition information
workUnit.removeProp(KafkaSource.PARTITION_ID);
workUnit.removeProp(KafkaSource.LEADER_ID);
workUnit.removeProp(KafkaSource.LEADER_HOSTANDPORT);
// Add combined partitions information
populateMultiPartitionWorkUnit(partitions, workUnit);
LOG.info(String.format("Created MultiWorkUnit for partitions %s", partitions));
return workUnit;
}
Aggregations