Search in sources :

Example 1 with DoFn

use of org.apache.crunch.DoFn in project crunch by cloudera.

the class Aggregate method min.

/**
 * Returns the smallest numerical element from the input collection.
 */
public static <S> PCollection<S> min(PCollection<S> collect) {
    Class<S> clazz = collect.getPType().getTypeClass();
    if (!clazz.isPrimitive() && !Comparable.class.isAssignableFrom(clazz)) {
        throw new IllegalArgumentException("Can only get min for Comparable elements, not for: " + collect.getPType().getTypeClass());
    }
    PTypeFamily tf = collect.getTypeFamily();
    return PTables.values(collect.parallelDo("min", new DoFn<S, Pair<Boolean, S>>() {

        private transient S min = null;

        public void process(S input, Emitter<Pair<Boolean, S>> emitter) {
            if (min == null || ((Comparable<S>) min).compareTo(input) > 0) {
                min = input;
            }
        }

        public void cleanup(Emitter<Pair<Boolean, S>> emitter) {
            if (min != null) {
                emitter.emit(Pair.of(false, min));
            }
        }
    }, tf.tableOf(tf.booleans(), collect.getPType())).groupByKey().combineValues(new CombineFn<Boolean, S>() {

        public void process(Pair<Boolean, Iterable<S>> input, Emitter<Pair<Boolean, S>> emitter) {
            S min = null;
            for (S v : input.second()) {
                if (min == null || ((Comparable<S>) min).compareTo(v) > 0) {
                    min = v;
                }
            }
            emitter.emit(Pair.of(input.first(), min));
        }
    }));
}
Also used : PTypeFamily(org.apache.crunch.types.PTypeFamily) Emitter(org.apache.crunch.Emitter) DoFn(org.apache.crunch.DoFn) CombineFn(org.apache.crunch.CombineFn) Pair(org.apache.crunch.Pair)

Example 2 with DoFn

use of org.apache.crunch.DoFn in project crunch by cloudera.

the class Aggregate method max.

/**
 * Returns the largest numerical element from the input collection.
 */
public static <S> PCollection<S> max(PCollection<S> collect) {
    Class<S> clazz = collect.getPType().getTypeClass();
    if (!clazz.isPrimitive() && !Comparable.class.isAssignableFrom(clazz)) {
        throw new IllegalArgumentException("Can only get max for Comparable elements, not for: " + collect.getPType().getTypeClass());
    }
    PTypeFamily tf = collect.getTypeFamily();
    return PTables.values(collect.parallelDo("max", new DoFn<S, Pair<Boolean, S>>() {

        private transient S max = null;

        public void process(S input, Emitter<Pair<Boolean, S>> emitter) {
            if (max == null || ((Comparable<S>) max).compareTo(input) < 0) {
                max = input;
            }
        }

        public void cleanup(Emitter<Pair<Boolean, S>> emitter) {
            if (max != null) {
                emitter.emit(Pair.of(true, max));
            }
        }
    }, tf.tableOf(tf.booleans(), collect.getPType())).groupByKey(1).combineValues(new CombineFn<Boolean, S>() {

        public void process(Pair<Boolean, Iterable<S>> input, Emitter<Pair<Boolean, S>> emitter) {
            S max = null;
            for (S v : input.second()) {
                if (max == null || ((Comparable<S>) max).compareTo(v) < 0) {
                    max = v;
                }
            }
            emitter.emit(Pair.of(input.first(), max));
        }
    }));
}
Also used : PTypeFamily(org.apache.crunch.types.PTypeFamily) Emitter(org.apache.crunch.Emitter) DoFn(org.apache.crunch.DoFn) CombineFn(org.apache.crunch.CombineFn) Pair(org.apache.crunch.Pair)

Aggregations

CombineFn (org.apache.crunch.CombineFn)2 DoFn (org.apache.crunch.DoFn)2 Emitter (org.apache.crunch.Emitter)2 Pair (org.apache.crunch.Pair)2 PTypeFamily (org.apache.crunch.types.PTypeFamily)2