use of org.apache.spark.api.java.JavaRDDLike in project camel by apache.
the class SparkProducerTest method createRegistry.
// Routes fixtures
@Override
protected JndiRegistry createRegistry() throws Exception {
JndiRegistry registry = super.createRegistry();
registry.bind("testFileRdd", sparkContext.textFile("src/test/resources/testrdd.txt"));
if (shouldRunHive) {
registry.bind("hiveContext", hiveContext);
DataFrame jsonCars = hiveContext.read().json("src/test/resources/cars.json");
jsonCars.registerTempTable("cars");
registry.bind("jsonCars", jsonCars);
}
registry.bind("countLinesTransformation", new org.apache.camel.component.spark.RddCallback() {
@Override
public Object onRdd(JavaRDDLike rdd, Object... payloads) {
return rdd.count();
}
});
return registry;
}
use of org.apache.spark.api.java.JavaRDDLike in project camel by apache.
the class RddSparkProducer method process.
@Override
public void process(Exchange exchange) throws Exception {
JavaRDDLike rdd = resolveRdd(exchange);
RddCallback rddCallback = resolveRddCallback(exchange);
Object body = exchange.getIn().getBody();
Object result = body instanceof List ? rddCallback.onRdd(rdd, ((List) body).toArray(new Object[0])) : rddCallback.onRdd(rdd, body);
collectResults(exchange, result);
}
use of org.apache.spark.api.java.JavaRDDLike in project spark-dataflow by cloudera.
the class TransformTranslator method multiDo.
private static <I, O> TransformEvaluator<ParDo.BoundMulti<I, O>> multiDo() {
return new TransformEvaluator<ParDo.BoundMulti<I, O>>() {
@Override
public void evaluate(ParDo.BoundMulti<I, O> transform, EvaluationContext context) {
TupleTag<O> mainOutputTag = MULTIDO_FG.get("mainOutputTag", transform);
MultiDoFnFunction<I, O> multifn = new MultiDoFnFunction<>(transform.getFn(), context.getRuntimeContext(), mainOutputTag, getSideInputs(transform.getSideInputs(), context));
@SuppressWarnings("unchecked") JavaRDDLike<WindowedValue<I>, ?> inRDD = (JavaRDDLike<WindowedValue<I>, ?>) context.getInputRDD(transform);
JavaPairRDD<TupleTag<?>, WindowedValue<?>> all = inRDD.mapPartitionsToPair(multifn).cache();
PCollectionTuple pct = context.getOutput(transform);
for (Map.Entry<TupleTag<?>, PCollection<?>> e : pct.getAll().entrySet()) {
@SuppressWarnings("unchecked") JavaPairRDD<TupleTag<?>, WindowedValue<?>> filtered = all.filter(new TupleTagFilter(e.getKey()));
@SuppressWarnings("unchecked") JavaRDD<WindowedValue<Object>> // Object is the best we can do since different outputs can have different tags
values = (JavaRDD<WindowedValue<Object>>) (JavaRDD<?>) filtered.values();
context.setRDD(e.getValue(), values);
}
}
};
}
use of org.apache.spark.api.java.JavaRDDLike in project camel by apache.
the class SparkProducerTest method shouldExecuteVoidCallback.
@Test
public void shouldExecuteVoidCallback() throws IOException {
// Given
final File output = File.createTempFile("camel", "spark");
output.delete();
// When
template.sendBodyAndHeader(sparkUri, null, SPARK_RDD_CALLBACK_HEADER, new VoidRddCallback() {
@Override
public void doOnRdd(JavaRDDLike rdd, Object... payloads) {
rdd.saveAsTextFile(output.getAbsolutePath());
}
});
// Then
Truth.assertThat(output.length()).isGreaterThan(0L);
}
use of org.apache.spark.api.java.JavaRDDLike in project spark-dataflow by cloudera.
the class TransformTranslator method combineGlobally.
private static <I, A, O> TransformEvaluator<Combine.Globally<I, O>> combineGlobally() {
return new TransformEvaluator<Combine.Globally<I, O>>() {
@Override
public void evaluate(Combine.Globally<I, O> transform, EvaluationContext context) {
final Combine.CombineFn<I, A, O> globally = COMBINE_GLOBALLY_FG.get("fn", transform);
@SuppressWarnings("unchecked") JavaRDDLike<WindowedValue<I>, ?> inRdd = (JavaRDDLike<WindowedValue<I>, ?>) context.getInputRDD(transform);
final Coder<I> iCoder = context.getInput(transform).getCoder();
final Coder<A> aCoder;
try {
aCoder = globally.getAccumulatorCoder(context.getPipeline().getCoderRegistry(), iCoder);
} catch (CannotProvideCoderException e) {
throw new IllegalStateException("Could not determine coder for accumulator", e);
}
// Use coders to convert objects in the PCollection to byte arrays, so they
// can be transferred over the network for the shuffle.
JavaRDD<byte[]> inRddBytes = inRdd.map(WindowingHelpers.<I>unwindowFunction()).map(CoderHelpers.toByteFunction(iCoder));
/*A*/
byte[] acc = inRddBytes.aggregate(CoderHelpers.toByteArray(globally.createAccumulator(), aCoder), new Function2<byte[], byte[], byte[]>() {
@Override
public byte[] call(/*A*/
byte[] ab, /*I*/
byte[] ib) throws Exception {
A a = CoderHelpers.fromByteArray(ab, aCoder);
I i = CoderHelpers.fromByteArray(ib, iCoder);
return CoderHelpers.toByteArray(globally.addInput(a, i), aCoder);
}
}, new Function2<byte[], byte[], byte[]>() {
@Override
public byte[] call(/*A*/
byte[] a1b, /*A*/
byte[] a2b) throws Exception {
A a1 = CoderHelpers.fromByteArray(a1b, aCoder);
A a2 = CoderHelpers.fromByteArray(a2b, aCoder);
// don't use Guava's ImmutableList.of as values may be null
List<A> accumulators = Collections.unmodifiableList(Arrays.asList(a1, a2));
A merged = globally.mergeAccumulators(accumulators);
return CoderHelpers.toByteArray(merged, aCoder);
}
});
O output = globally.extractOutput(CoderHelpers.fromByteArray(acc, aCoder));
Coder<O> coder = context.getOutput(transform).getCoder();
JavaRDD<byte[]> outRdd = context.getSparkContext().parallelize(// don't use Guava's ImmutableList.of as output may be null
CoderHelpers.toByteArrays(Collections.singleton(output), coder));
context.setOutputRDD(transform, outRdd.map(CoderHelpers.fromByteFunction(coder)).map(WindowingHelpers.<O>windowFunction()));
}
};
}
Aggregations