use of org.apache.crunch.types.PTypeFamily in project crunch by cloudera.
the class Aggregate method top.
public static <K, V> PTable<K, V> top(PTable<K, V> ptable, int limit, boolean maximize) {
PTypeFamily ptf = ptable.getTypeFamily();
PTableType<K, V> base = ptable.getPTableType();
PType<Pair<K, V>> pairType = ptf.pairs(base.getKeyType(), base.getValueType());
PTableType<Integer, Pair<K, V>> inter = ptf.tableOf(ptf.ints(), pairType);
return ptable.parallelDo("top" + limit + "map", new TopKFn<K, V>(limit, maximize), inter).groupByKey(1).combineValues(new TopKCombineFn<K, V>(limit, maximize)).parallelDo("top" + limit + "reduce", new DoFn<Pair<Integer, Pair<K, V>>, Pair<K, V>>() {
public void process(Pair<Integer, Pair<K, V>> input, Emitter<Pair<K, V>> emitter) {
emitter.emit(input.second());
}
}, base);
}
use of org.apache.crunch.types.PTypeFamily in project crunch by cloudera.
the class Join method join.
public static <K, U, V> PTable<K, Pair<U, V>> join(PTable<K, U> left, PTable<K, V> right, JoinFn<K, U, V> joinFn) {
PTypeFamily ptf = left.getTypeFamily();
PGroupedTable<Pair<K, Integer>, Pair<U, V>> grouped = preJoin(left, right);
PTableType<K, Pair<U, V>> ret = ptf.tableOf(left.getKeyType(), ptf.pairs(left.getValueType(), right.getValueType()));
return grouped.parallelDo(joinFn.getJoinType() + grouped.getName(), joinFn, ret);
}
use of org.apache.crunch.types.PTypeFamily in project crunch by cloudera.
the class Join method preJoin.
private static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin(PTable<K, U> left, PTable<K, V> right) {
PTypeFamily ptf = left.getTypeFamily();
PTableType<Pair<K, Integer>, Pair<U, V>> ptt = ptf.tableOf(ptf.pairs(left.getKeyType(), ptf.ints()), ptf.pairs(left.getValueType(), right.getValueType()));
PTable<Pair<K, Integer>, Pair<U, V>> tag1 = left.parallelDo("joinTagLeft", new MapFn<Pair<K, U>, Pair<Pair<K, Integer>, Pair<U, V>>>() {
@Override
public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, U> input) {
return Pair.of(Pair.of(input.first(), 0), Pair.of(input.second(), (V) null));
}
}, ptt);
PTable<Pair<K, Integer>, Pair<U, V>> tag2 = right.parallelDo("joinTagRight", new MapFn<Pair<K, V>, Pair<Pair<K, Integer>, Pair<U, V>>>() {
@Override
public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, V> input) {
return Pair.of(Pair.of(input.first(), 1), Pair.of((U) null, input.second()));
}
}, ptt);
GroupingOptions.Builder optionsBuilder = GroupingOptions.builder();
optionsBuilder.partitionerClass(JoinUtils.getPartitionerClass(ptf));
return (tag1.union(tag2)).groupByKey(optionsBuilder.build());
}
use of org.apache.crunch.types.PTypeFamily in project crunch by cloudera.
the class PageRankTest method testWritablesBSON.
@Test
public void testWritablesBSON() throws Exception {
PTypeFamily tf = WritableTypeFamily.getInstance();
PType<PageRankData> prType = PTypes.smile(PageRankData.class, tf);
run(new MRPipeline(PageRankTest.class), prType, tf);
}
use of org.apache.crunch.types.PTypeFamily in project crunch by cloudera.
the class PageRankTest method testWritablesJSON.
@Test
public void testWritablesJSON() throws Exception {
PTypeFamily tf = WritableTypeFamily.getInstance();
PType<PageRankData> prType = PTypes.jsonString(PageRankData.class, tf);
run(new MRPipeline(PageRankTest.class), prType, tf);
}
Aggregations