use of org.apache.spark.rdd.PartitionGroup in project gatk by broadinstitute.
the class RangePartitionCoalescer method coalesce.
@Override
public PartitionGroup[] coalesce(int maxPartitions, RDD<?> parent) {
if (maxPartitions != parent.getNumPartitions()) {
throw new IllegalArgumentException("Cannot use " + getClass().getSimpleName() + " with a different number of partitions to the parent RDD.");
}
List<Partition> partitions = Arrays.asList(parent.getPartitions());
PartitionGroup[] groups = new PartitionGroup[partitions.size()];
for (int i = 0; i < partitions.size(); i++) {
Seq<String> preferredLocations = parent.getPreferredLocations(partitions.get(i));
scala.Option<String> preferredLocation = scala.Option.apply(preferredLocations.isEmpty() ? null : preferredLocations.apply(0));
PartitionGroup group = new PartitionGroup(preferredLocation);
List<Partition> partitionsInGroup = partitions.subList(i, maxEndPartitionIndexes.get(i) + 1);
group.partitions().append(JavaConversions.asScalaBuffer(partitionsInGroup));
groups[i] = group;
}
return groups;
}
Aggregations