use of com.google.cloud.dataflow.sdk.util.WindowedValue in project spark-dataflow by cloudera.
the class TransformTranslator method writeAvro.
private static <T> TransformEvaluator<AvroIO.Write.Bound<T>> writeAvro() {
return new TransformEvaluator<AvroIO.Write.Bound<T>>() {
@Override
public void evaluate(AvroIO.Write.Bound<T> transform, EvaluationContext context) {
Job job;
try {
job = Job.getInstance();
} catch (IOException e) {
throw new IllegalStateException(e);
}
AvroJob.setOutputKeySchema(job, transform.getSchema());
@SuppressWarnings("unchecked") JavaPairRDD<AvroKey<T>, NullWritable> last = ((JavaRDDLike<WindowedValue<T>, ?>) context.getInputRDD(transform)).map(WindowingHelpers.<T>unwindowFunction()).mapToPair(new PairFunction<T, AvroKey<T>, NullWritable>() {
@Override
public Tuple2<AvroKey<T>, NullWritable> call(T t) throws Exception {
return new Tuple2<>(new AvroKey<>(t), NullWritable.get());
}
});
ShardTemplateInformation shardTemplateInfo = new ShardTemplateInformation(transform.getNumShards(), transform.getShardTemplate(), transform.getFilenamePrefix(), transform.getFilenameSuffix());
writeHadoopFile(last, job.getConfiguration(), shardTemplateInfo, AvroKey.class, NullWritable.class, TemplatedAvroKeyOutputFormat.class);
}
};
}
use of com.google.cloud.dataflow.sdk.util.WindowedValue in project spark-dataflow by cloudera.
the class TransformTranslator method readAvro.
private static <T> TransformEvaluator<AvroIO.Read.Bound<T>> readAvro() {
return new TransformEvaluator<AvroIO.Read.Bound<T>>() {
@Override
public void evaluate(AvroIO.Read.Bound<T> transform, EvaluationContext context) {
String pattern = transform.getFilepattern();
JavaSparkContext jsc = context.getSparkContext();
@SuppressWarnings("unchecked") JavaRDD<AvroKey<T>> avroFile = (JavaRDD<AvroKey<T>>) (JavaRDD<?>) jsc.newAPIHadoopFile(pattern, AvroKeyInputFormat.class, AvroKey.class, NullWritable.class, new Configuration()).keys();
JavaRDD<WindowedValue<T>> rdd = avroFile.map(new Function<AvroKey<T>, T>() {
@Override
public T call(AvroKey<T> key) {
return key.datum();
}
}).map(WindowingHelpers.<T>windowFunction());
context.setOutputRDD(transform, rdd);
}
};
}
use of com.google.cloud.dataflow.sdk.util.WindowedValue in project spark-dataflow by cloudera.
the class StreamingTransformTranslator method window.
private static <T, W extends BoundedWindow> TransformEvaluator<Window.Bound<T>> window() {
return new TransformEvaluator<Window.Bound<T>>() {
@Override
public void evaluate(Window.Bound<T> transform, EvaluationContext context) {
StreamingEvaluationContext sec = (StreamingEvaluationContext) context;
//--- first we apply windowing to the stream
WindowFn<? super T, W> windowFn = WINDOW_FG.get("windowFn", transform);
@SuppressWarnings("unchecked") JavaDStream<WindowedValue<T>> dStream = (JavaDStream<WindowedValue<T>>) sec.getStream(transform);
if (windowFn instanceof FixedWindows) {
Duration windowDuration = Durations.milliseconds(((FixedWindows) windowFn).getSize().getMillis());
sec.setStream(transform, dStream.window(windowDuration));
} else if (windowFn instanceof SlidingWindows) {
Duration windowDuration = Durations.milliseconds(((SlidingWindows) windowFn).getSize().getMillis());
Duration slideDuration = Durations.milliseconds(((SlidingWindows) windowFn).getPeriod().getMillis());
sec.setStream(transform, dStream.window(windowDuration, slideDuration));
}
//--- then we apply windowing to the elements
DoFn<T, T> addWindowsDoFn = new AssignWindowsDoFn<>(windowFn);
DoFnFunction<T, T> dofn = new DoFnFunction<>(addWindowsDoFn, ((StreamingEvaluationContext) context).getRuntimeContext(), null);
@SuppressWarnings("unchecked") JavaDStreamLike<WindowedValue<T>, ?, JavaRDD<WindowedValue<T>>> dstream = (JavaDStreamLike<WindowedValue<T>, ?, JavaRDD<WindowedValue<T>>>) sec.getStream(transform);
sec.setStream(transform, dstream.mapPartitions(dofn));
}
};
}
use of com.google.cloud.dataflow.sdk.util.WindowedValue in project spark-dataflow by cloudera.
the class TransformTranslator method combineGlobally.
private static <I, A, O> TransformEvaluator<Combine.Globally<I, O>> combineGlobally() {
return new TransformEvaluator<Combine.Globally<I, O>>() {
@Override
public void evaluate(Combine.Globally<I, O> transform, EvaluationContext context) {
final Combine.CombineFn<I, A, O> globally = COMBINE_GLOBALLY_FG.get("fn", transform);
@SuppressWarnings("unchecked") JavaRDDLike<WindowedValue<I>, ?> inRdd = (JavaRDDLike<WindowedValue<I>, ?>) context.getInputRDD(transform);
final Coder<I> iCoder = context.getInput(transform).getCoder();
final Coder<A> aCoder;
try {
aCoder = globally.getAccumulatorCoder(context.getPipeline().getCoderRegistry(), iCoder);
} catch (CannotProvideCoderException e) {
throw new IllegalStateException("Could not determine coder for accumulator", e);
}
// Use coders to convert objects in the PCollection to byte arrays, so they
// can be transferred over the network for the shuffle.
JavaRDD<byte[]> inRddBytes = inRdd.map(WindowingHelpers.<I>unwindowFunction()).map(CoderHelpers.toByteFunction(iCoder));
/*A*/
byte[] acc = inRddBytes.aggregate(CoderHelpers.toByteArray(globally.createAccumulator(), aCoder), new Function2<byte[], byte[], byte[]>() {
@Override
public byte[] call(/*A*/
byte[] ab, /*I*/
byte[] ib) throws Exception {
A a = CoderHelpers.fromByteArray(ab, aCoder);
I i = CoderHelpers.fromByteArray(ib, iCoder);
return CoderHelpers.toByteArray(globally.addInput(a, i), aCoder);
}
}, new Function2<byte[], byte[], byte[]>() {
@Override
public byte[] call(/*A*/
byte[] a1b, /*A*/
byte[] a2b) throws Exception {
A a1 = CoderHelpers.fromByteArray(a1b, aCoder);
A a2 = CoderHelpers.fromByteArray(a2b, aCoder);
// don't use Guava's ImmutableList.of as values may be null
List<A> accumulators = Collections.unmodifiableList(Arrays.asList(a1, a2));
A merged = globally.mergeAccumulators(accumulators);
return CoderHelpers.toByteArray(merged, aCoder);
}
});
O output = globally.extractOutput(CoderHelpers.fromByteArray(acc, aCoder));
Coder<O> coder = context.getOutput(transform).getCoder();
JavaRDD<byte[]> outRdd = context.getSparkContext().parallelize(// don't use Guava's ImmutableList.of as output may be null
CoderHelpers.toByteArrays(Collections.singleton(output), coder));
context.setOutputRDD(transform, outRdd.map(CoderHelpers.fromByteFunction(coder)).map(WindowingHelpers.<O>windowFunction()));
}
};
}
Aggregations