use of org.apache.flink.api.common.io.ReplicatingInputFormat in project flink by apache.
the class ReplicatingDataSourceITCase method testReplicatedSourceToJoin.
@Test
public void testReplicatedSourceToJoin() throws Exception {
/*
* Test replicated source going into join
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple1<Long>> source1 = env.createInput(new ReplicatingInputFormat<Long, GenericInputSplit>(new ParallelIteratorInputFormat<Long>(new NumberSequenceIterator(0l, 1000l))), BasicTypeInfo.LONG_TYPE_INFO).map(new ToTuple());
DataSet<Tuple1<Long>> source2 = env.generateSequence(0l, 1000l).map(new ToTuple());
DataSet<Tuple> pairs = source1.join(source2).where(0).equalTo(0).projectFirst(0).sum(0);
List<Tuple> result = pairs.collect();
String expectedResult = "(500500)";
compareResultAsText(result, expectedResult);
}
use of org.apache.flink.api.common.io.ReplicatingInputFormat in project flink by apache.
the class DataSourceNode method getAlternativePlans.
@Override
public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
if (this.cachedPlans != null) {
return this.cachedPlans;
}
SourcePlanNode candidate = new SourcePlanNode(this, "DataSource (" + this.getOperator().getName() + ")", this.gprops, this.lprops);
if (!replicatedInput) {
candidate.updatePropertiesWithUniqueSets(getUniqueFields());
final Costs costs = new Costs();
if (FileInputFormat.class.isAssignableFrom(getOperator().getFormatWrapper().getUserCodeClass()) && this.estimatedOutputSize >= 0) {
estimator.addFileInputCost(this.estimatedOutputSize, costs);
}
candidate.setCosts(costs);
} else {
// replicated input
final Costs costs = new Costs();
InputFormat<?, ?> inputFormat = ((ReplicatingInputFormat<?, ?>) getOperator().getFormatWrapper().getUserCodeObject()).getReplicatedInputFormat();
if (FileInputFormat.class.isAssignableFrom(inputFormat.getClass()) && this.estimatedOutputSize >= 0) {
estimator.addFileInputCost(this.estimatedOutputSize * this.getParallelism(), costs);
}
candidate.setCosts(costs);
}
// since there is only a single plan for the data-source, return a list with that element
// only
List<PlanNode> plans = new ArrayList<PlanNode>(1);
plans.add(candidate);
this.cachedPlans = plans;
return plans;
}
use of org.apache.flink.api.common.io.ReplicatingInputFormat in project flink by apache.
the class ReplicatingDataSourceTest method checkCrossWithReplicatedSourceInputBehindMap.
/**
* Tests cross program with replicated data source behind map and filter.
*/
@Test
public void checkCrossWithReplicatedSourceInputBehindMap() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.map(new IdMap()).filter(new NoFilter()).cross(source2).writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when cross should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode crossNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType crossIn1 = crossNode.getInput1().getShipStrategy();
ShipStrategyType crossIn2 = crossNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn2);
}
use of org.apache.flink.api.common.io.ReplicatingInputFormat in project flink by apache.
the class ReplicatingDataSourceTest method checkCrossWithReplicatedSourceInput.
/**
* Tests cross program with replicated data source.
*/
@Test
public void checkCrossWithReplicatedSourceInput() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.cross(source2).writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when cross should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode crossNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType crossIn1 = crossNode.getInput1().getShipStrategy();
ShipStrategyType crossIn2 = crossNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn2);
}
use of org.apache.flink.api.common.io.ReplicatingInputFormat in project flink by apache.
the class ReplicatingDataSourceTest method checkJoinWithReplicatedSourceInputBehindMapPartition.
/**
* Tests join program with replicated data source behind map partition.
*/
@Test
public void checkJoinWithReplicatedSourceInputBehindMapPartition() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.mapPartition(new IdPMap()).join(source2).where("*").equalTo("*").writeAsText("/some/newpath");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when join should have forward strategy on both sides
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
Aggregations