use of org.apache.crunch.types.PTypeFamily in project crunch by cloudera.
the class Aggregate method collectValues.
public static <K, V> PTable<K, Collection<V>> collectValues(PTable<K, V> collect) {
PTypeFamily tf = collect.getTypeFamily();
final PType<V> valueType = collect.getValueType();
return collect.groupByKey().parallelDo("collect", new MapValuesFn<K, Iterable<V>, Collection<V>>() {
public Collection<V> map(Iterable<V> values) {
List<V> collected = Lists.newArrayList();
for (V value : values) {
collected.add(valueType.getDetachedValue(value));
}
return collected;
}
}, tf.tableOf(collect.getKeyType(), tf.collections(collect.getValueType())));
}
use of org.apache.crunch.types.PTypeFamily in project crunch by cloudera.
the class PageRankTest method testAvroJSON.
@Test
public void testAvroJSON() throws Exception {
PTypeFamily tf = AvroTypeFamily.getInstance();
PType<PageRankData> prType = PTypes.jsonString(PageRankData.class, tf);
run(new MRPipeline(PageRankTest.class), prType, tf);
}
use of org.apache.crunch.types.PTypeFamily in project crunch by cloudera.
the class PageRankTest method testAvroReflect.
@Test
public void testAvroReflect() throws Exception {
PTypeFamily tf = AvroTypeFamily.getInstance();
PType<PageRankData> prType = Avros.reflects(PageRankData.class);
run(new MRPipeline(PageRankTest.class), prType, tf);
}
use of org.apache.crunch.types.PTypeFamily in project crunch by cloudera.
the class PageRankTest method testAvroBSON.
@Test
public void testAvroBSON() throws Exception {
PTypeFamily tf = AvroTypeFamily.getInstance();
PType<PageRankData> prType = PTypes.smile(PageRankData.class, tf);
run(new MRPipeline(PageRankTest.class), prType, tf);
}
use of org.apache.crunch.types.PTypeFamily in project crunch by cloudera.
the class Aggregate method min.
/**
* Returns the smallest numerical element from the input collection.
*/
public static <S> PCollection<S> min(PCollection<S> collect) {
Class<S> clazz = collect.getPType().getTypeClass();
if (!clazz.isPrimitive() && !Comparable.class.isAssignableFrom(clazz)) {
throw new IllegalArgumentException("Can only get min for Comparable elements, not for: " + collect.getPType().getTypeClass());
}
PTypeFamily tf = collect.getTypeFamily();
return PTables.values(collect.parallelDo("min", new DoFn<S, Pair<Boolean, S>>() {
private transient S min = null;
public void process(S input, Emitter<Pair<Boolean, S>> emitter) {
if (min == null || ((Comparable<S>) min).compareTo(input) > 0) {
min = input;
}
}
public void cleanup(Emitter<Pair<Boolean, S>> emitter) {
if (min != null) {
emitter.emit(Pair.of(false, min));
}
}
}, tf.tableOf(tf.booleans(), collect.getPType())).groupByKey().combineValues(new CombineFn<Boolean, S>() {
public void process(Pair<Boolean, Iterable<S>> input, Emitter<Pair<Boolean, S>> emitter) {
S min = null;
for (S v : input.second()) {
if (min == null || ((Comparable<S>) min).compareTo(v) > 0) {
min = v;
}
}
emitter.emit(Pair.of(input.first(), min));
}
}));
}
Aggregations