use of org.apache.flink.api.java.DataSet in project flink by apache.
the class GSACompilerTest method testGSACompiler.
@Test
public void testGSACompiler() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple3<>(1L, 2L, NullValue.getInstance())).map(new Tuple3ToEdgeMap<Long, NullValue>());
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new InitVertices(), env);
DataSet<Vertex<Long, Long>> result = graph.runGatherSumApplyIteration(new GatherNeighborIds(), new SelectMinId(), new UpdateComponentId(), 100).getVertices();
result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
}
Plan p = env.createProgramPlan("GSA Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
assertEquals(PartitioningProperty.HASH_PARTITIONED, sink.getGlobalProperties().getPartitioning());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the solution set join and the delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
// this is only true if the update function preserves the partitioning
assertTrue(ssDelta instanceof DualInputPlanNode);
DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());
// check the workset set join
SingleInputPlanNode sumReducer = (SingleInputPlanNode) ssJoin.getInput1().getSource();
SingleInputPlanNode gatherMapper = (SingleInputPlanNode) sumReducer.getInput().getSource();
DualInputPlanNode edgeJoin = (DualInputPlanNode) gatherMapper.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
// input1 is the workset
assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput1().getShipStrategy());
// input2 is the edges
assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput2().getShipStrategy());
assertTrue(edgeJoin.getInput2().getTempMode().isCached());
assertEquals(new FieldList(0), edgeJoin.getInput2().getShipStrategyKeys());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class GSATranslationTest method testTranslation.
@Test
public void testTranslation() {
try {
final String ITERATION_NAME = "Test Name";
final String AGGREGATOR_NAME = "AggregatorName";
final String BC_SET_GATHER_NAME = "gather messages";
final String BC_SET_SUM_NAME = "sum updates";
final String BC_SET_APLLY_NAME = "apply updates";
final int NUM_ITERATIONS = 13;
final int ITERATION_parallelism = 77;
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Long> bcGather = env.fromElements(1L);
DataSet<Long> bcSum = env.fromElements(1L);
DataSet<Long> bcApply = env.fromElements(1L);
DataSet<Vertex<Long, Long>> result;
// ------------ construct the test program ------------------
{
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple3<>(1L, 2L, NullValue.getInstance())).map(new Tuple3ToEdgeMap<Long, NullValue>());
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new InitVertices(), env);
GSAConfiguration parameters = new GSAConfiguration();
parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());
parameters.setName(ITERATION_NAME);
parameters.setParallelism(ITERATION_parallelism);
parameters.addBroadcastSetForGatherFunction(BC_SET_GATHER_NAME, bcGather);
parameters.addBroadcastSetForSumFunction(BC_SET_SUM_NAME, bcSum);
parameters.addBroadcastSetForApplyFunction(BC_SET_APLLY_NAME, bcApply);
result = graph.runGatherSumApplyIteration(new GatherNeighborIds(), new SelectMinId(), new UpdateComponentId(), NUM_ITERATIONS, parameters).getVertices();
result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
}
// ------------- validate the java program ----------------
assertTrue(result instanceof DeltaIterationResultSet);
DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
DeltaIteration<?, ?> iteration = resultSet.getIterationHead();
// check the basic iteration properties
assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
assertArrayEquals(new int[] { 0 }, resultSet.getKeyPositions());
assertEquals(ITERATION_parallelism, iteration.getParallelism());
assertEquals(ITERATION_NAME, iteration.getName());
assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());
// validate that the semantic properties are set as they should
TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));
SingleInputUdfOperator<?, ?, ?> sumReduce = (SingleInputUdfOperator<?, ?, ?>) solutionSetJoin.getInput1();
SingleInputUdfOperator<?, ?, ?> gatherMap = (SingleInputUdfOperator<?, ?, ?>) sumReduce.getInput();
// validate that the broadcast sets are forwarded
assertEquals(bcGather, gatherMap.getBroadcastSets().get(BC_SET_GATHER_NAME));
assertEquals(bcSum, sumReduce.getBroadcastSets().get(BC_SET_SUM_NAME));
assertEquals(bcApply, solutionSetJoin.getBroadcastSets().get(BC_SET_APLLY_NAME));
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class TPCHQuery10 method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
if (!parseParameters(args)) {
return;
}
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// get customer data set: (custkey, name, address, nationkey, acctbal)
DataSet<Tuple5<Integer, String, String, Integer, Double>> customers = getCustomerDataSet(env);
// get orders data set: (orderkey, custkey, orderdate)
DataSet<Tuple3<Integer, Integer, String>> orders = getOrdersDataSet(env);
// get lineitem data set: (orderkey, extendedprice, discount, returnflag)
DataSet<Tuple4<Integer, Double, Double, String>> lineitems = getLineitemDataSet(env);
// get nation data set: (nationkey, name)
DataSet<Tuple2<Integer, String>> nations = getNationsDataSet(env);
// orders filtered by year: (orderkey, custkey)
DataSet<Tuple2<Integer, Integer>> ordersFilteredByYear = // filter by year
orders.filter(order -> Integer.parseInt(order.f2.substring(0, 4)) > 1990).project(0, 1);
// lineitems filtered by flag: (orderkey, extendedprice, discount)
DataSet<Tuple3<Integer, Double, Double>> lineitemsFilteredByFlag = // filter by flag
lineitems.filter(lineitem -> lineitem.f3.equals("R")).project(0, 1, 2);
// join orders with lineitems: (custkey, extendedprice, discount)
DataSet<Tuple3<Integer, Double, Double>> lineitemsOfCustomerKey = ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag).where(0).equalTo(0).projectFirst(1).projectSecond(1, 2);
// aggregate for revenue: (custkey, revenue)
DataSet<Tuple2<Integer, Double>> revenueOfCustomerKey = lineitemsOfCustomerKey.map(i -> new Tuple2<>(i.f0, i.f1 * (1 - i.f2))).groupBy(0).sum(1);
// join customer with nation (custkey, name, address, nationname, acctbal)
DataSet<Tuple5<Integer, String, String, String, Double>> customerWithNation = customers.joinWithTiny(nations).where(3).equalTo(0).projectFirst(0, 1, 2).projectSecond(1).projectFirst(4);
// join customer (with nation) with revenue (custkey, name, address, nationname, acctbal, revenue)
DataSet<Tuple6<Integer, String, String, String, Double, Double>> customerWithRevenue = customerWithNation.join(revenueOfCustomerKey).where(0).equalTo(0).projectFirst(0, 1, 2, 3, 4).projectSecond(1);
// emit result
customerWithRevenue.writeAsCsv(outputPath);
// execute program
env.execute("TPCH Query 10 Example");
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class FilterLambda1 method main.
public static void main(String[] args) throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> input = env.fromElements("Please filter", "the words", "but not this");
FilterFunction<String> filter = (v) -> WordFilter.filter(v);
DataSet<String> output = input.filter(filter);
output.print();
env.execute();
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class OperatorTranslation method translateSingleInputOperator.
private <I, O> org.apache.flink.api.common.operators.Operator<O> translateSingleInputOperator(SingleInputOperator<?, ?, ?> op) {
@SuppressWarnings("unchecked") SingleInputOperator<I, O, ?> typedOp = (SingleInputOperator<I, O, ?>) op;
@SuppressWarnings("unchecked") DataSet<I> typedInput = (DataSet<I>) op.getInput();
Operator<I> input = translate(typedInput);
org.apache.flink.api.common.operators.Operator<O> dataFlowOp = typedOp.translateToDataFlow(input);
if (op instanceof UdfOperator<?>) {
@SuppressWarnings("unchecked") SingleInputUdfOperator<I, O, ?> udfOp = (SingleInputUdfOperator<I, O, ?>) op;
// set configuration parameters
Configuration opParams = udfOp.getParameters();
if (opParams != null) {
dataFlowOp.getParameters().addAll(opParams);
}
if (dataFlowOp instanceof org.apache.flink.api.common.operators.SingleInputOperator) {
org.apache.flink.api.common.operators.SingleInputOperator<?, O, ?> unaryOp = (org.apache.flink.api.common.operators.SingleInputOperator<?, O, ?>) dataFlowOp;
// set the semantic properties
unaryOp.setSemanticProperties(udfOp.getSemanticProperties());
}
}
return dataFlowOp;
}
Aggregations