use of org.apache.crunch.GroupingOptions in project crunch by cloudera.
the class Sort method sort.
/**
* Sorts the {@link PCollection} using the natural ordering of its elements
* in the order specified.
*
* @return a {@link PCollection} representing the sorted collection.
*/
public static <T> PCollection<T> sort(PCollection<T> collection, Order order) {
PTypeFamily tf = collection.getTypeFamily();
PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls());
Configuration conf = collection.getPipeline().getConfiguration();
GroupingOptions options = buildGroupingOptions(conf, tf, collection.getPType(), order);
PTable<T, Void> pt = collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() {
@Override
public void process(T input, Emitter<Pair<T, Void>> emitter) {
emitter.emit(Pair.of(input, (Void) null));
}
}, type);
PTable<T, Void> sortedPt = pt.groupByKey(options).ungroup();
return sortedPt.parallelDo("sort-post", new DoFn<Pair<T, Void>, T>() {
@Override
public void process(Pair<T, Void> input, Emitter<T> emitter) {
emitter.emit(input.first());
}
}, collection.getPType());
}
Aggregations