use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.
the class PregelCompilerTest method testPregelCompiler.
@SuppressWarnings("serial")
@Test
public void testPregelCompiler() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<>());
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {
public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
}
});
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(new CCCompute(), null, 100).getVertices();
result.output(new DiscardingOutputFormat<>());
}
Plan p = env.createProgramPlan("Pregel Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the solution set delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
assertTrue(ssDelta instanceof SingleInputPlanNode);
SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());
// check the computation coGroup
DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
assertTrue(computationCoGroup.getInput2().getTempMode().isCached());
assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());
// check that the initial partitioning is pushed out of the loop
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}
use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.
the class PregelCompilerTest method testPregelWithCombiner.
@SuppressWarnings("serial")
@Test
public void testPregelWithCombiner() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<>());
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {
public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
}
});
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(new CCCompute(), new CCCombiner(), 100).getVertices();
result.output(new DiscardingOutputFormat<>());
}
Plan p = env.createProgramPlan("Pregel Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the combiner
SingleInputPlanNode combiner = (SingleInputPlanNode) iteration.getInput2().getSource();
assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
// check the solution set delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
assertTrue(ssDelta instanceof SingleInputPlanNode);
SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());
// check the computation coGroup
DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
assertTrue(computationCoGroup.getInput2().getTempMode().isCached());
assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());
// check that the initial partitioning is pushed out of the loop
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}
use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.
the class SpargelCompilerTest method testSpargelCompiler.
@SuppressWarnings("serial")
@Test
public void testSpargelCompiler() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<>());
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {
public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
}
});
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(new ConnectedComponents.CCMessenger<>(BasicTypeInfo.LONG_TYPE_INFO), new ConnectedComponents.CCUpdater<>(), 100).getVertices();
result.output(new DiscardingOutputFormat<>());
Plan p = env.createProgramPlan("Spargel Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the solution set join and the delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
assertTrue(ssDelta instanceof // this is only true if the update functions preserves
DualInputPlanNode);
// the partitioning
DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());
// check the workset set join
DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
assertTrue(edgeJoin.getInput1().getTempMode().isCached());
assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());
// check that the initial partitioning is pushed out of the loop
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());
// check that the initial workset sort is outside the loop
assertEquals(LocalStrategy.SORT, iteration.getInput2().getLocalStrategy());
assertEquals(new FieldList(0), iteration.getInput2().getLocalStrategyKeys());
}
use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.
the class LocalProperties method filterBySemanticProperties.
// --------------------------------------------------------------------------------------------
/**
* Filters these LocalProperties by the fields that are forwarded to the output as described by
* the SemanticProperties.
*
* @param props The semantic properties holding information about forwarded fields.
* @param input The index of the input.
* @return The filtered LocalProperties
*/
public LocalProperties filterBySemanticProperties(SemanticProperties props, int input) {
if (props == null) {
throw new NullPointerException("SemanticProperties may not be null.");
}
LocalProperties returnProps = new LocalProperties();
// check if sorting is preserved
if (this.ordering != null) {
Ordering newOrdering = new Ordering();
for (int i = 0; i < this.ordering.getInvolvedIndexes().size(); i++) {
int sourceField = this.ordering.getInvolvedIndexes().get(i);
FieldSet targetField = props.getForwardingTargetFields(input, sourceField);
if (targetField == null || targetField.size() == 0) {
if (i == 0) {
// order fully destroyed
newOrdering = null;
break;
} else {
// order partially preserved
break;
}
} else {
// equivalence sets in the future.
if (targetField.size() > 1) {
LOG.warn("Found that a field is forwarded to more than one target field in " + "semantic forwarded field information. Will only use the field with the lowest index.");
}
newOrdering.appendOrdering(targetField.toArray()[0], this.ordering.getType(i), this.ordering.getOrder(i));
}
}
returnProps.ordering = newOrdering;
if (newOrdering != null) {
returnProps.groupedFields = newOrdering.getInvolvedIndexes();
} else {
returnProps.groupedFields = null;
}
} else // check if grouping is preserved
if (this.groupedFields != null) {
FieldList newGroupedFields = new FieldList();
for (Integer sourceField : this.groupedFields) {
FieldSet targetField = props.getForwardingTargetFields(input, sourceField);
if (targetField == null || targetField.size() == 0) {
newGroupedFields = null;
break;
} else {
// equivalence sets in the future.
if (targetField.size() > 1) {
LOG.warn("Found that a field is forwarded to more than one target field in " + "semantic forwarded field information. Will only use the field with the lowest index.");
}
newGroupedFields = newGroupedFields.addField(targetField.toArray()[0]);
}
}
returnProps.groupedFields = newGroupedFields;
}
if (this.uniqueFields != null) {
Set<FieldSet> newUniqueFields = new HashSet<FieldSet>();
for (FieldSet fields : this.uniqueFields) {
FieldSet newFields = new FieldSet();
for (Integer sourceField : fields) {
FieldSet targetField = props.getForwardingTargetFields(input, sourceField);
if (targetField == null || targetField.size() == 0) {
newFields = null;
break;
} else {
// field equivalence sets in the future.
if (targetField.size() > 1) {
LOG.warn("Found that a field is forwarded to more than one target field in " + "semantic forwarded field information. Will only use the field with the lowest index.");
}
newFields = newFields.addField(targetField.toArray()[0]);
}
}
if (newFields != null) {
newUniqueFields.add(newFields);
}
}
if (!newUniqueFields.isEmpty()) {
returnProps.uniqueFields = newUniqueFields;
} else {
returnProps.uniqueFields = null;
}
}
return returnProps;
}
use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.
the class RequestedGlobalProperties method filterBySemanticProperties.
/**
* Filters these properties by what can be preserved by the given SemanticProperties when
* propagated down to the given input.
*
* @param props The SemanticProperties which define which fields are preserved.
* @param input The index of the operator's input.
* @return The filtered RequestedGlobalProperties
*/
public RequestedGlobalProperties filterBySemanticProperties(SemanticProperties props, int input) {
// no semantic properties available. All global properties are filtered.
if (props == null) {
throw new NullPointerException("SemanticProperties may not be null.");
}
RequestedGlobalProperties rgProp = new RequestedGlobalProperties();
switch(this.partitioning) {
case FULL_REPLICATION:
case FORCED_REBALANCED:
case CUSTOM_PARTITIONING:
case RANDOM_PARTITIONED:
case ANY_DISTRIBUTION:
// make sure that certain properties are not pushed down
return null;
case HASH_PARTITIONED:
case ANY_PARTITIONING:
FieldSet newFields;
if (this.partitioningFields instanceof FieldList) {
newFields = new FieldList();
} else {
newFields = new FieldSet();
}
for (Integer targetField : this.partitioningFields) {
int sourceField = props.getForwardingSourceField(input, targetField);
if (sourceField >= 0) {
newFields = newFields.addField(sourceField);
} else {
// partial partitionings are not preserved to avoid skewed partitioning
return null;
}
}
rgProp.partitioning = this.partitioning;
rgProp.partitioningFields = newFields;
return rgProp;
case RANGE_PARTITIONED:
// range partitioning
Ordering newOrdering = new Ordering();
for (int i = 0; i < this.ordering.getInvolvedIndexes().size(); i++) {
int value = this.ordering.getInvolvedIndexes().get(i);
int sourceField = props.getForwardingSourceField(input, value);
if (sourceField >= 0) {
newOrdering.appendOrdering(sourceField, this.ordering.getType(i), this.ordering.getOrder(i));
} else {
return null;
}
}
rgProp.partitioning = this.partitioning;
rgProp.ordering = newOrdering;
rgProp.dataDistribution = this.dataDistribution;
return rgProp;
default:
throw new RuntimeException("Unknown partitioning type encountered.");
}
}
Aggregations