use of org.apache.flink.optimizer.dataproperties.LocalProperties in project flink by apache.
the class PropertyDataSourceTest method checkCoPartitionedSources2.
@Test
public void checkCoPartitionedSources2() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSource<Tuple2<Long, String>> data1 = env.readCsvFile("/some/path").types(Long.class, String.class);
data1.getSplitDataProperties().splitsPartitionedBy("byCountry", 0);
DataSource<Tuple2<Long, String>> data2 = env.readCsvFile("/some/path").types(Long.class, String.class);
data2.getSplitDataProperties().splitsPartitionedBy("byDate", 0);
data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>());
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode) sinkNode.getPredecessor()).getListOfInputs().get(0).getSource();
SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode) sinkNode.getPredecessor()).getListOfInputs().get(1).getSource();
GlobalProperties gprops1 = sourceNode1.getGlobalProperties();
LocalProperties lprops1 = sourceNode1.getLocalProperties();
GlobalProperties gprops2 = sourceNode2.getGlobalProperties();
LocalProperties lprops2 = sourceNode2.getLocalProperties();
Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0)));
Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
Assert.assertTrue(lprops1.getGroupedFields() == null);
Assert.assertTrue(lprops1.getOrdering() == null);
Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0)));
Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
Assert.assertTrue(lprops2.getGroupedFields() == null);
Assert.assertTrue(lprops2.getOrdering() == null);
Assert.assertTrue(!gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner()));
}
use of org.apache.flink.optimizer.dataproperties.LocalProperties in project flink by apache.
the class SemanticPropertiesAPIToPlanTest method forwardFieldsTestMapReduce.
@Test
public void forwardFieldsTestMapReduce() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Integer, Integer>> set = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
set = set.map(new MockMapper()).withForwardedFields("*").groupBy(0).reduce(new MockReducer()).withForwardedFields("f0->f1").map(new MockMapper()).withForwardedFields("*").groupBy(1).reduce(new MockReducer()).withForwardedFields("*");
set.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileWithStats(plan);
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof ReduceOperatorBase) {
for (Channel input : visitable.getInputs()) {
GlobalProperties gprops = visitable.getGlobalProperties();
LocalProperties lprops = visitable.getLocalProperties();
Assert.assertTrue("Reduce should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.isPartitionedOnFields(new FieldSet(1)));
Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
Assert.assertTrue("Wrong LocalProperties on Reducer", lprops.getGroupedFields().contains(1));
}
}
if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof MapOperatorBase) {
for (Channel input : visitable.getInputs()) {
GlobalProperties gprops = visitable.getGlobalProperties();
LocalProperties lprops = visitable.getLocalProperties();
Assert.assertTrue("Map should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.isPartitionedOnFields(new FieldSet(1)));
Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
Assert.assertTrue("Wrong LocalProperties on Mapper", lprops.getGroupedFields().contains(1));
}
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
}
});
}
use of org.apache.flink.optimizer.dataproperties.LocalProperties in project flink by apache.
the class TwoInputNode method instantiate.
protected void instantiate(OperatorDescriptorDual operator, Channel in1, Channel in2, List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator, RequestedGlobalProperties globPropsReq1, RequestedGlobalProperties globPropsReq2, RequestedLocalProperties locPropsReq1, RequestedLocalProperties locPropsReq2) {
final PlanNode inputSource1 = in1.getSource();
final PlanNode inputSource2 = in2.getSource();
for (List<NamedChannel> broadcastChannelsCombination : Sets.cartesianProduct(broadcastPlanChannels)) {
boolean validCombination = true;
// check whether the broadcast inputs use the same plan candidate at the branching point
for (int i = 0; i < broadcastChannelsCombination.size(); i++) {
NamedChannel nc = broadcastChannelsCombination.get(i);
PlanNode bcSource = nc.getSource();
if (!(areBranchCompatible(bcSource, inputSource1) || areBranchCompatible(bcSource, inputSource2))) {
validCombination = false;
break;
}
// check branch compatibility against all other broadcast variables
for (int k = 0; k < i; k++) {
PlanNode otherBcSource = broadcastChannelsCombination.get(k).getSource();
if (!areBranchCompatible(bcSource, otherBcSource)) {
validCombination = false;
break;
}
}
}
if (!validCombination) {
continue;
}
placePipelineBreakersIfNecessary(operator.getStrategy(), in1, in2);
DualInputPlanNode node = operator.instantiate(in1, in2, this);
node.setBroadcastInputs(broadcastChannelsCombination);
SemanticProperties semPropsGlobalPropFiltering = getSemanticPropertiesForGlobalPropertyFiltering();
GlobalProperties gp1 = in1.getGlobalProperties().clone().filterBySemanticProperties(semPropsGlobalPropFiltering, 0);
GlobalProperties gp2 = in2.getGlobalProperties().clone().filterBySemanticProperties(semPropsGlobalPropFiltering, 1);
GlobalProperties combined = operator.computeGlobalProperties(gp1, gp2);
SemanticProperties semPropsLocalPropFiltering = getSemanticPropertiesForLocalPropertyFiltering();
LocalProperties lp1 = in1.getLocalProperties().clone().filterBySemanticProperties(semPropsLocalPropFiltering, 0);
LocalProperties lp2 = in2.getLocalProperties().clone().filterBySemanticProperties(semPropsLocalPropFiltering, 1);
LocalProperties locals = operator.computeLocalProperties(lp1, lp2);
node.initProperties(combined, locals);
node.updatePropertiesWithUniqueSets(getUniqueFields());
target.add(node);
}
}
use of org.apache.flink.optimizer.dataproperties.LocalProperties in project flink by apache.
the class DataSourceNode method setDataPropertiesFromSplitProperties.
private void setDataPropertiesFromSplitProperties(SplitDataProperties splitProps) {
// set global properties
int[] partitionKeys = splitProps.getSplitPartitionKeys();
Partitioner<?> partitioner = splitProps.getSplitPartitioner();
if (partitionKeys != null && partitioner != null) {
this.gprops.setCustomPartitioned(new FieldList(partitionKeys), partitioner);
} else if (partitionKeys != null) {
this.gprops.setAnyPartitioning(new FieldList(partitionKeys));
}
// set local properties
int[] groupingKeys = splitProps.getSplitGroupKeys();
Ordering ordering = splitProps.getSplitOrder();
// adapt split grouping and sorting
if (ordering != null) {
// sorting falls back to grouping because a source can read multiple,
// randomly assigned splits
groupingKeys = ordering.getFieldPositions();
}
if (groupingKeys != null && partitionKeys != null) {
// check if grouping is also valid across splits, i.e., whether grouping keys are
// valid superset of partition keys
boolean allFieldsIncluded = true;
for (int i : partitionKeys) {
boolean fieldIncluded = false;
for (int j : groupingKeys) {
if (i == j) {
fieldIncluded = true;
break;
}
}
if (!fieldIncluded) {
allFieldsIncluded = false;
break;
}
}
if (allFieldsIncluded) {
this.lprops = LocalProperties.forGrouping(new FieldList(groupingKeys));
} else {
this.lprops = new LocalProperties();
}
} else {
this.lprops = new LocalProperties();
}
}
use of org.apache.flink.optimizer.dataproperties.LocalProperties in project flink by apache.
the class PropertyDataSourceTest method checkSinglePartitionedSource2.
@Test
public void checkSinglePartitionedSource2() {
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class);
data.getSplitDataProperties().splitsPartitionedBy(1, 0);
data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();
GlobalProperties gprops = sourceNode.getGlobalProperties();
LocalProperties lprops = sourceNode.getLocalProperties();
Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1)));
Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
Assert.assertTrue(lprops.getGroupedFields() == null);
Assert.assertTrue(lprops.getOrdering() == null);
}
Aggregations