use of org.apache.crunch.types.PType in project crunch by cloudera.
the class SortTest method runTupleN.
private void runTupleN(Pipeline pipeline, PTypeFamily typeFamily, ColumnOrder[] orders, String[] fields) throws IOException {
String inputPath = FileHelper.createTempCopyOf("docs.txt");
PCollection<String> input = pipeline.readTextFile(inputPath);
PType[] types = new PType[orders.length];
Arrays.fill(types, typeFamily.strings());
PCollection<TupleN> kv = input.parallelDo(new DoFn<String, TupleN>() {
@Override
public void process(String input, Emitter<TupleN> emitter) {
String[] split = input.split("[\t]+");
emitter.emit(new TupleN(split));
}
}, typeFamily.tuples(types));
PCollection<TupleN> sorted = Sort.sortTuples(kv, orders);
Iterable<TupleN> lines = sorted.materialize();
TupleN l = lines.iterator().next();
int i = 0;
for (String field : fields) {
assertEquals(field, l.get(i++));
}
pipeline.done();
}
use of org.apache.crunch.types.PType in project crunch by cloudera.
the class Writables method derived.
public static <S, T> PType<T> derived(Class<T> clazz, MapFn<S, T> inputFn, MapFn<T, S> outputFn, PType<S> base) {
WritableType<S, ?> wt = (WritableType<S, ?>) base;
MapFn input = new CompositeMapFn(wt.getInputMapFn(), inputFn);
MapFn output = new CompositeMapFn(outputFn, wt.getOutputMapFn());
return new WritableType(clazz, wt.getSerializationClass(), input, output, base.getSubTypes().toArray(new PType[0]));
}
Aggregations