Search in sources :

Example 36 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class HadoopReduceCombineFunctionITCase method testConfigurationViaJobConf.

@Test
public void testConfigurationViaJobConf() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    JobConf conf = new JobConf();
    conf.set("my.cntPrefix", "Hello");
    DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).map(new Mapper4());
    DataSet<Tuple2<IntWritable, IntWritable>> hellos = ds.groupBy(0).reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new ConfigurableCntReducer(), conf));
    String resultPath = tempFolder.newFile().toURI().toString();
    hellos.writeAsText(resultPath);
    env.execute();
    // return expected result
    String expected = "(0,0)\n" + "(1,0)\n" + "(2,1)\n" + "(3,1)\n" + "(4,1)\n";
    compareResultsByLinesInMemory(expected, resultPath);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Text(org.apache.hadoop.io.Text) JobConf(org.apache.hadoop.mapred.JobConf) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 37 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class HBaseConnectorITCase method testTableSourceFieldOrder.

@Test
public void testTableSourceFieldOrder() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(4);
    BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, new TableConfig());
    HBaseTableSource hbaseTable = new HBaseTableSource(getConf(), TEST_TABLE);
    // shuffle order of column registration
    hbaseTable.addColumn(FAMILY2, F2COL1, String.class);
    hbaseTable.addColumn(FAMILY3, F3COL1, Double.class);
    hbaseTable.addColumn(FAMILY1, F1COL1, Integer.class);
    hbaseTable.addColumn(FAMILY2, F2COL2, Long.class);
    hbaseTable.addColumn(FAMILY3, F3COL2, Boolean.class);
    hbaseTable.addColumn(FAMILY3, F3COL3, String.class);
    tableEnv.registerTableSource("hTable", hbaseTable);
    Table result = tableEnv.sql("SELECT * FROM hTable AS h");
    DataSet<Row> resultSet = tableEnv.toDataSet(result, Row.class);
    List<Row> results = resultSet.collect();
    String expected = "Hello-1,100,1.01,false,Welt-1,10\n" + "Hello-2,200,2.02,true,Welt-2,20\n" + "Hello-3,300,3.03,false,Welt-3,30\n" + "null,400,4.04,true,Welt-4,40\n" + "Hello-5,500,5.05,false,Welt-5,50\n" + "Hello-6,600,6.06,true,Welt-6,60\n" + "Hello-7,700,7.07,false,Welt-7,70\n" + "null,800,8.08,true,Welt-8,80\n";
    TestBaseUtils.compareResultAsText(results, expected);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Table(org.apache.flink.table.api.Table) HTable(org.apache.hadoop.hbase.client.HTable) TableConfig(org.apache.flink.table.api.TableConfig) Row(org.apache.flink.types.Row) BatchTableEnvironment(org.apache.flink.table.api.java.BatchTableEnvironment) Test(org.junit.Test)

Example 38 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class HBaseConnectorITCase method testTableInputFormat.

@Test
public void testTableInputFormat() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(4);
    DataSet<Tuple1<Integer>> result = env.createInput(new InputFormatForTestTable()).reduce(new ReduceFunction<Tuple1<Integer>>() {

        @Override
        public Tuple1<Integer> reduce(Tuple1<Integer> v1, Tuple1<Integer> v2) throws Exception {
            return Tuple1.of(v1.f0 + v2.f0);
        }
    });
    List<Tuple1<Integer>> resultSet = result.collect();
    assertEquals(1, resultSet.size());
    assertEquals(360, (int) resultSet.get(0).f0);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple1(org.apache.flink.api.java.tuple.Tuple1) IOException(java.io.IOException) Test(org.junit.Test)

Example 39 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class NotSoMiniClusterIterations method runConnectedComponents.

private static void runConnectedComponents(int jmPort) throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", jmPort);
    env.setParallelism(PARALLELISM);
    env.getConfig().disableSysoutLogging();
    // read vertex and edge data
    DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env).rebalance();
    DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env).rebalance().flatMap(new ConnectedComponents.UndirectEdge());
    // assign the initial components (equal to the vertex id)
    DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponents.DuplicateValue<Long>());
    // open a delta iteration
    DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
    // apply the step logic: join with the edges, select the minimum neighbor,
    // update if the component of the candidate is smaller
    DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()).groupBy(0).aggregate(Aggregations.MIN, 1).join(iteration.getSolutionSet()).where(0).equalTo(0).with(new ConnectedComponents.ComponentIdFilter());
    // close the delta iteration (delta and new workset are identical)
    DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
    result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
    env.execute();
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ConnectedComponents(org.apache.flink.examples.java.graph.ConnectedComponents) Tuple2(org.apache.flink.api.java.tuple.Tuple2)

Example 40 with ExecutionEnvironment

use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.

the class OverwriteObjects method run.

public void run() throws Exception {
    LOG.info("Random seed = {}", RANDOM_SEED);
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().disableSysoutLogging();
    for (int parallelism = MAX_PARALLELISM; parallelism > 0; parallelism--) {
        LOG.info("Parallelism = {}", parallelism);
        env.setParallelism(parallelism);
        testReduce(env);
        testGroupedReduce(env);
        testJoin(env);
        testCross(env);
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) JoinHint(org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint)

Aggregations

ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)1247 Test (org.junit.Test)1090 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)374 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)264 Plan (org.apache.flink.api.common.Plan)238 Tuple5 (org.apache.flink.api.java.tuple.Tuple5)236 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)199 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)139 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)138 Vertex (org.apache.flink.graph.Vertex)93 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)73 Edge (org.apache.flink.graph.Edge)70 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)66 ArrayList (java.util.ArrayList)57 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)49 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)44 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)39 BatchTableEnvironment (org.apache.flink.table.api.java.BatchTableEnvironment)38 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)37 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)35