use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.
the class HadoopReduceCombineFunctionITCase method testConfigurationViaJobConf.
@Test
public void testConfigurationViaJobConf() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
JobConf conf = new JobConf();
conf.set("my.cntPrefix", "Hello");
DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).map(new Mapper4());
DataSet<Tuple2<IntWritable, IntWritable>> hellos = ds.groupBy(0).reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new ConfigurableCntReducer(), conf));
String resultPath = tempFolder.newFile().toURI().toString();
hellos.writeAsText(resultPath);
env.execute();
// return expected result
String expected = "(0,0)\n" + "(1,0)\n" + "(2,1)\n" + "(3,1)\n" + "(4,1)\n";
compareResultsByLinesInMemory(expected, resultPath);
}
use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.
the class HBaseConnectorITCase method testTableSourceFieldOrder.
@Test
public void testTableSourceFieldOrder() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, new TableConfig());
HBaseTableSource hbaseTable = new HBaseTableSource(getConf(), TEST_TABLE);
// shuffle order of column registration
hbaseTable.addColumn(FAMILY2, F2COL1, String.class);
hbaseTable.addColumn(FAMILY3, F3COL1, Double.class);
hbaseTable.addColumn(FAMILY1, F1COL1, Integer.class);
hbaseTable.addColumn(FAMILY2, F2COL2, Long.class);
hbaseTable.addColumn(FAMILY3, F3COL2, Boolean.class);
hbaseTable.addColumn(FAMILY3, F3COL3, String.class);
tableEnv.registerTableSource("hTable", hbaseTable);
Table result = tableEnv.sql("SELECT * FROM hTable AS h");
DataSet<Row> resultSet = tableEnv.toDataSet(result, Row.class);
List<Row> results = resultSet.collect();
String expected = "Hello-1,100,1.01,false,Welt-1,10\n" + "Hello-2,200,2.02,true,Welt-2,20\n" + "Hello-3,300,3.03,false,Welt-3,30\n" + "null,400,4.04,true,Welt-4,40\n" + "Hello-5,500,5.05,false,Welt-5,50\n" + "Hello-6,600,6.06,true,Welt-6,60\n" + "Hello-7,700,7.07,false,Welt-7,70\n" + "null,800,8.08,true,Welt-8,80\n";
TestBaseUtils.compareResultAsText(results, expected);
}
use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.
the class HBaseConnectorITCase method testTableInputFormat.
@Test
public void testTableInputFormat() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
DataSet<Tuple1<Integer>> result = env.createInput(new InputFormatForTestTable()).reduce(new ReduceFunction<Tuple1<Integer>>() {
@Override
public Tuple1<Integer> reduce(Tuple1<Integer> v1, Tuple1<Integer> v2) throws Exception {
return Tuple1.of(v1.f0 + v2.f0);
}
});
List<Tuple1<Integer>> resultSet = result.collect();
assertEquals(1, resultSet.size());
assertEquals(360, (int) resultSet.get(0).f0);
}
use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.
the class NotSoMiniClusterIterations method runConnectedComponents.
private static void runConnectedComponents(int jmPort) throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", jmPort);
env.setParallelism(PARALLELISM);
env.getConfig().disableSysoutLogging();
// read vertex and edge data
DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env).rebalance();
DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env).rebalance().flatMap(new ConnectedComponents.UndirectEdge());
// assign the initial components (equal to the vertex id)
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponents.DuplicateValue<Long>());
// open a delta iteration
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
// apply the step logic: join with the edges, select the minimum neighbor,
// update if the component of the candidate is smaller
DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()).groupBy(0).aggregate(Aggregations.MIN, 1).join(iteration.getSolutionSet()).where(0).equalTo(0).with(new ConnectedComponents.ComponentIdFilter());
// close the delta iteration (delta and new workset are identical)
DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);
result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
env.execute();
}
use of org.apache.flink.api.java.ExecutionEnvironment in project flink by apache.
the class OverwriteObjects method run.
public void run() throws Exception {
LOG.info("Random seed = {}", RANDOM_SEED);
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().disableSysoutLogging();
for (int parallelism = MAX_PARALLELISM; parallelism > 0; parallelism--) {
LOG.info("Parallelism = {}", parallelism);
env.setParallelism(parallelism);
testReduce(env);
testGroupedReduce(env);
testJoin(env);
testCross(env);
}
}
Aggregations