use of org.apache.crunch.DoFn in project crunch by cloudera.
the class Aggregate method min.
/**
* Returns the smallest numerical element from the input collection.
*/
public static <S> PCollection<S> min(PCollection<S> collect) {
Class<S> clazz = collect.getPType().getTypeClass();
if (!clazz.isPrimitive() && !Comparable.class.isAssignableFrom(clazz)) {
throw new IllegalArgumentException("Can only get min for Comparable elements, not for: " + collect.getPType().getTypeClass());
}
PTypeFamily tf = collect.getTypeFamily();
return PTables.values(collect.parallelDo("min", new DoFn<S, Pair<Boolean, S>>() {
private transient S min = null;
public void process(S input, Emitter<Pair<Boolean, S>> emitter) {
if (min == null || ((Comparable<S>) min).compareTo(input) > 0) {
min = input;
}
}
public void cleanup(Emitter<Pair<Boolean, S>> emitter) {
if (min != null) {
emitter.emit(Pair.of(false, min));
}
}
}, tf.tableOf(tf.booleans(), collect.getPType())).groupByKey().combineValues(new CombineFn<Boolean, S>() {
public void process(Pair<Boolean, Iterable<S>> input, Emitter<Pair<Boolean, S>> emitter) {
S min = null;
for (S v : input.second()) {
if (min == null || ((Comparable<S>) min).compareTo(v) > 0) {
min = v;
}
}
emitter.emit(Pair.of(input.first(), min));
}
}));
}
use of org.apache.crunch.DoFn in project crunch by cloudera.
the class Aggregate method max.
/**
* Returns the largest numerical element from the input collection.
*/
public static <S> PCollection<S> max(PCollection<S> collect) {
Class<S> clazz = collect.getPType().getTypeClass();
if (!clazz.isPrimitive() && !Comparable.class.isAssignableFrom(clazz)) {
throw new IllegalArgumentException("Can only get max for Comparable elements, not for: " + collect.getPType().getTypeClass());
}
PTypeFamily tf = collect.getTypeFamily();
return PTables.values(collect.parallelDo("max", new DoFn<S, Pair<Boolean, S>>() {
private transient S max = null;
public void process(S input, Emitter<Pair<Boolean, S>> emitter) {
if (max == null || ((Comparable<S>) max).compareTo(input) < 0) {
max = input;
}
}
public void cleanup(Emitter<Pair<Boolean, S>> emitter) {
if (max != null) {
emitter.emit(Pair.of(true, max));
}
}
}, tf.tableOf(tf.booleans(), collect.getPType())).groupByKey(1).combineValues(new CombineFn<Boolean, S>() {
public void process(Pair<Boolean, Iterable<S>> input, Emitter<Pair<Boolean, S>> emitter) {
S max = null;
for (S v : input.second()) {
if (max == null || ((Comparable<S>) max).compareTo(v) < 0) {
max = v;
}
}
emitter.emit(Pair.of(input.first(), max));
}
}));
}
Aggregations