use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.
the class ConsumeFromKinesis method main.
public static void main(String[] args) throws Exception {
ParameterTool pt = ParameterTool.fromArgs(args);
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(1);
Properties kinesisConsumerConfig = new Properties();
kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));
DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>("flink-test", new SimpleStringSchema(), kinesisConsumerConfig));
kinesis.print();
see.execute();
}
use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.
the class ProduceIntoKinesis method main.
public static void main(String[] args) throws Exception {
ParameterTool pt = ParameterTool.fromArgs(args);
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(1);
DataStream<String> simpleStringStream = see.addSource(new EventsGenerator());
Properties kinesisProducerConfig = new Properties();
kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_REGION, pt.getRequired("region"));
kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey"));
kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey"));
FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(new SimpleStringSchema(), kinesisProducerConfig);
kinesis.setFailOnError(true);
kinesis.setDefaultStream("flink-test");
kinesis.setDefaultPartition("0");
simpleStringStream.addSink(kinesis);
see.execute();
}
use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.
the class HadoopUtilsTest method testParamsFromGenericOptionsParser.
@Test
public void testParamsFromGenericOptionsParser() throws IOException {
ParameterTool parameter = HadoopUtils.paramsFromGenericOptionsParser(new String[] { "-D", "input=myInput", "-DexpectedCount=15" });
validate(parameter);
}
use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.
the class TPCHQuery3 method main.
// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
final ParameterTool params = ParameterTool.fromArgs(args);
if (!params.has("lineitem") && !params.has("customer") && !params.has("orders")) {
System.err.println(" This program expects data from the TPC-H benchmark as input data.");
System.err.println(" Due to legal restrictions, we can not ship generated data.");
System.out.println(" You can find the TPC-H data generator at http://www.tpc.org/tpch/.");
System.out.println(" Usage: TPCHQuery3 --lineitem <path> --customer <path> --orders <path> [--output <path>]");
return;
}
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setGlobalJobParameters(params);
// get input data
DataSet<Lineitem> lineitems = getLineitemDataSet(env, params.get("lineitem"));
DataSet<Order> orders = getOrdersDataSet(env, params.get("customer"));
DataSet<Customer> customers = getCustomerDataSet(env, params.get("orders"));
// Filter market segment "AUTOMOBILE"
customers = customers.filter(new FilterFunction<Customer>() {
@Override
public boolean filter(Customer c) {
return c.getMktsegment().equals("AUTOMOBILE");
}
});
// Filter all Orders with o_orderdate < 12.03.1995
orders = orders.filter(new FilterFunction<Order>() {
private final DateFormat format = new SimpleDateFormat("yyyy-MM-dd");
private final Date date = format.parse("1995-03-12");
@Override
public boolean filter(Order o) throws ParseException {
return format.parse(o.getOrderdate()).before(date);
}
});
// Filter all Lineitems with l_shipdate > 12.03.1995
lineitems = lineitems.filter(new FilterFunction<Lineitem>() {
private final DateFormat format = new SimpleDateFormat("yyyy-MM-dd");
private final Date date = format.parse("1995-03-12");
@Override
public boolean filter(Lineitem l) throws ParseException {
return format.parse(l.getShipdate()).after(date);
}
});
// Join customers with orders and package them into a ShippingPriorityItem
DataSet<ShippingPriorityItem> customerWithOrders = customers.join(orders).where(0).equalTo(1).with(new JoinFunction<Customer, Order, ShippingPriorityItem>() {
@Override
public ShippingPriorityItem join(Customer c, Order o) {
return new ShippingPriorityItem(o.getOrderKey(), 0.0, o.getOrderdate(), o.getShippriority());
}
});
// Join the last join result with Lineitems
DataSet<ShippingPriorityItem> result = customerWithOrders.join(lineitems).where(0).equalTo(0).with(new JoinFunction<ShippingPriorityItem, Lineitem, ShippingPriorityItem>() {
@Override
public ShippingPriorityItem join(ShippingPriorityItem i, Lineitem l) {
i.setRevenue(l.getExtendedprice() * (1 - l.getDiscount()));
return i;
}
}).groupBy(0, 2, 3).aggregate(Aggregations.SUM, 1);
// emit result
if (params.has("output")) {
result.writeAsCsv(params.get("output"), "\n", "|");
// execute program
env.execute("TPCH Query 3 Example");
} else {
System.out.println("Printing result to stdout. Use --output to specify output path.");
result.print();
}
}
use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.
the class KMeans method main.
public static void main(String[] args) throws Exception {
// Checking input parameters
final ParameterTool params = ParameterTool.fromArgs(args);
// set up execution environment
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// make parameters available in the web interface
env.getConfig().setGlobalJobParameters(params);
// get input data:
// read the points and centroids from the provided paths or fall back to default data
DataSet<Point> points = getPointDataSet(params, env);
DataSet<Centroid> centroids = getCentroidDataSet(params, env);
// set number of bulk iterations for KMeans algorithm
IterativeDataSet<Centroid> loop = centroids.iterate(params.getInt("iterations", 10));
DataSet<Centroid> newCentroids = points.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids").map(new CountAppender()).groupBy(0).reduce(new CentroidAccumulator()).map(new CentroidAverager());
// feed new centroids back into next iteration
DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);
DataSet<Tuple2<Integer, Point>> clusteredPoints = points.map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");
// emit result
if (params.has("output")) {
clusteredPoints.writeAsCsv(params.get("output"), "\n", " ");
// since file sinks are lazy, we trigger the execution explicitly
env.execute("KMeans Example");
} else {
System.out.println("Printing result to stdout. Use --output to specify output path.");
clusteredPoints.print();
}
}
Aggregations