use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.
the class SortPartitionTest method testSortPartitionWithPositionKeys4.
@Test(expected = InvalidProgramException.class)
public void testSortPartitionWithPositionKeys4() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple4<Integer, Long, CustomType, Long[]>> tupleDs = env.fromCollection(tupleWithCustomData, tupleWithCustomInfo);
// must not work
tupleDs.sortPartition(3, Order.ASCENDING);
}
use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.
the class GroupingTupleTranslationTest method testCustomPartitioningTupleGroupReduceSorted2.
@Test
public void testCustomPartitioningTupleGroupReduceSorted2() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple4<Integer, Integer, Integer, Integer>> data = env.fromElements(new Tuple4<Integer, Integer, Integer, Integer>(0, 0, 0, 0)).rebalance().setParallelism(4);
data.groupBy(0).withPartitioner(new TestPartitionerInt()).sortGroup(1, Order.ASCENDING).sortGroup(2, Order.DESCENDING).reduceGroup(new IdentityGroupReducerCombinable<Tuple4<Integer, Integer, Integer, Integer>>()).output(new DiscardingOutputFormat<Tuple4<Integer, Integer, Integer, Integer>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.
the class FileCache method createTmpFile.
// ------------------------------------------------------------------------
/**
* If the file doesn't exists locally, it will copy the file to the temp directory.
*
* @param name The name under which the file is registered.
* @param entry The cache entry descriptor (path, executable flag)
* @param jobID The ID of the job for which the file is copied.
* @return The handle to the task that copies the file.
*/
public Future<Path> createTmpFile(String name, DistributedCacheEntry entry, JobID jobID) {
synchronized (lock) {
Map<String, Tuple4<Integer, File, Path, Future<Path>>> jobEntries = entries.get(jobID);
if (jobEntries == null) {
jobEntries = new HashMap<String, Tuple4<Integer, File, Path, Future<Path>>>();
entries.put(jobID, jobEntries);
}
// tuple is (ref-count, parent-temp-dir, cached-file-path, copy-process)
Tuple4<Integer, File, Path, Future<Path>> fileEntry = jobEntries.get(name);
if (fileEntry != null) {
// file is already in the cache. return a future that
// immediately returns the file
fileEntry.f0 = fileEntry.f0 + 1;
// return the future. may be that the copy is still in progress
return fileEntry.f3;
} else {
// need to copy the file
// create the target path
File tempDirToUse = new File(storageDirectories[nextDirectory++], jobID.toString());
if (nextDirectory >= storageDirectories.length) {
nextDirectory = 0;
}
String sourceFile = entry.filePath;
int posOfSep = sourceFile.lastIndexOf("/");
if (posOfSep > 0) {
sourceFile = sourceFile.substring(posOfSep + 1);
}
Path target = new Path(tempDirToUse.getAbsolutePath() + "/" + sourceFile);
// kick off the copying
CopyProcess cp = new CopyProcess(entry, target);
FutureTask<Path> copyTask = new FutureTask<Path>(cp);
executorService.submit(copyTask);
// store our entry
jobEntries.put(name, new Tuple4<Integer, File, Path, Future<Path>>(1, tempDirToUse, target, copyTask));
return copyTask;
}
}
}
use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.
the class TPCHQuery10 method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
final ParameterTool params = ParameterTool.fromArgs(args);
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
if (!params.has("customer") && !params.has("orders") && !params.has("lineitem") && !params.has("nation")) {
System.err.println(" This program expects data from the TPC-H benchmark as input data.");
System.err.println(" Due to legal restrictions, we can not ship generated data.");
System.err.println(" You can find the TPC-H data generator at http://www.tpc.org/tpch/.");
System.err.println(" Usage: TPCHQuery10 --customer <path> --orders <path> --lineitem <path> --nation <path> [--output <path>]");
return;
}
// get customer data set: (custkey, name, address, nationkey, acctbal)
DataSet<Tuple5<Integer, String, String, Integer, Double>> customers = getCustomerDataSet(env, params.get("customer"));
// get orders data set: (orderkey, custkey, orderdate)
DataSet<Tuple3<Integer, Integer, String>> orders = getOrdersDataSet(env, params.get("orders"));
// get lineitem data set: (orderkey, extendedprice, discount, returnflag)
DataSet<Tuple4<Integer, Double, Double, String>> lineitems = getLineitemDataSet(env, params.get("lineitem"));
// get nation data set: (nationkey, name)
DataSet<Tuple2<Integer, String>> nations = getNationsDataSet(env, params.get("nation"));
// orders filtered by year: (orderkey, custkey)
DataSet<Tuple2<Integer, Integer>> ordersFilteredByYear = // filter by year
orders.filter(new FilterFunction<Tuple3<Integer, Integer, String>>() {
@Override
public boolean filter(Tuple3<Integer, Integer, String> o) {
return Integer.parseInt(o.f2.substring(0, 4)) > 1990;
}
}).project(0, 1);
// lineitems filtered by flag: (orderkey, revenue)
DataSet<Tuple2<Integer, Double>> lineitemsFilteredByFlag = // filter by flag
lineitems.filter(new FilterFunction<Tuple4<Integer, Double, Double, String>>() {
@Override
public boolean filter(Tuple4<Integer, Double, Double, String> l) {
return l.f3.equals("R");
}
}).map(new MapFunction<Tuple4<Integer, Double, Double, String>, Tuple2<Integer, Double>>() {
@Override
public Tuple2<Integer, Double> map(Tuple4<Integer, Double, Double, String> l) {
// revenue per item = l_extendedprice * (1 - l_discount)
return new Tuple2<Integer, Double>(l.f0, l.f1 * (1 - l.f2));
}
});
// join orders with lineitems: (custkey, revenue)
DataSet<Tuple2<Integer, Double>> revenueByCustomer = ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag).where(0).equalTo(0).projectFirst(1).projectSecond(1);
revenueByCustomer = revenueByCustomer.groupBy(0).aggregate(Aggregations.SUM, 1);
// join customer with nation (custkey, name, address, nationname, acctbal)
DataSet<Tuple5<Integer, String, String, String, Double>> customerWithNation = customers.joinWithTiny(nations).where(3).equalTo(0).projectFirst(0, 1, 2).projectSecond(1).projectFirst(4);
// join customer (with nation) with revenue (custkey, name, address, nationname, acctbal, revenue)
DataSet<Tuple6<Integer, String, String, String, Double, Double>> result = customerWithNation.join(revenueByCustomer).where(0).equalTo(0).projectFirst(0, 1, 2, 3, 4).projectSecond(1);
// emit result
if (params.has("output")) {
result.writeAsCsv(params.get("output"), "\n", "|");
// execute program
env.execute("TPCH Query 10 Example");
} else {
System.out.println("Printing result to stdout. Use --output to specify output path.");
result.print();
}
}
use of org.apache.flink.api.java.tuple.Tuple4 in project flink by apache.
the class GroupingTest method testGroupSortByKeyExpression3.
@Test
public void testGroupSortByKeyExpression3() {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple4<Integer, Long, CustomType, Long[]>> tupleDs = env.fromCollection(tupleWithCustomData, tupleWithCustomInfo);
// should work
try {
tupleDs.groupBy("f0").sortGroup("f2.myString", Order.ASCENDING).sortGroup("f1", Order.DESCENDING);
} catch (Exception e) {
Assert.fail();
}
}
Aggregations