Search in sources :

Example 6 with ParameterTool

use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.

the class ConsumeFromKinesis method main.

public static void main(String[] args) throws Exception {
    ParameterTool pt = ParameterTool.fromArgs(args);
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);
    Properties kinesisConsumerConfig = new Properties();
    kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
    kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
    kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));
    DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>("flink-test", new SimpleStringSchema(), kinesisConsumerConfig));
    kinesis.print();
    see.execute();
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Properties(java.util.Properties)

Example 7 with ParameterTool

use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.

the class ProduceIntoKinesis method main.

public static void main(String[] args) throws Exception {
    ParameterTool pt = ParameterTool.fromArgs(args);
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);
    DataStream<String> simpleStringStream = see.addSource(new EventsGenerator());
    Properties kinesisProducerConfig = new Properties();
    kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_REGION, pt.getRequired("region"));
    kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey"));
    kinesisProducerConfig.setProperty(ProducerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey"));
    FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(new SimpleStringSchema(), kinesisProducerConfig);
    kinesis.setFailOnError(true);
    kinesis.setDefaultStream("flink-test");
    kinesis.setDefaultPartition("0");
    simpleStringStream.addSink(kinesis);
    see.execute();
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) FlinkKinesisProducer(org.apache.flink.streaming.connectors.kinesis.FlinkKinesisProducer) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Properties(java.util.Properties)

Example 8 with ParameterTool

use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.

the class HadoopUtilsTest method testParamsFromGenericOptionsParser.

@Test
public void testParamsFromGenericOptionsParser() throws IOException {
    ParameterTool parameter = HadoopUtils.paramsFromGenericOptionsParser(new String[] { "-D", "input=myInput", "-DexpectedCount=15" });
    validate(parameter);
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) AbstractParameterToolTest(org.apache.flink.api.java.utils.AbstractParameterToolTest) Test(org.junit.Test)

Example 9 with ParameterTool

use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.

the class TPCHQuery3 method main.

// *************************************************************************
//     PROGRAM
// *************************************************************************
public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    if (!params.has("lineitem") && !params.has("customer") && !params.has("orders")) {
        System.err.println("  This program expects data from the TPC-H benchmark as input data.");
        System.err.println("  Due to legal restrictions, we can not ship generated data.");
        System.out.println("  You can find the TPC-H data generator at http://www.tpc.org/tpch/.");
        System.out.println("  Usage: TPCHQuery3 --lineitem <path> --customer <path> --orders <path> [--output <path>]");
        return;
    }
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(params);
    // get input data
    DataSet<Lineitem> lineitems = getLineitemDataSet(env, params.get("lineitem"));
    DataSet<Order> orders = getOrdersDataSet(env, params.get("customer"));
    DataSet<Customer> customers = getCustomerDataSet(env, params.get("orders"));
    // Filter market segment "AUTOMOBILE"
    customers = customers.filter(new FilterFunction<Customer>() {

        @Override
        public boolean filter(Customer c) {
            return c.getMktsegment().equals("AUTOMOBILE");
        }
    });
    // Filter all Orders with o_orderdate < 12.03.1995
    orders = orders.filter(new FilterFunction<Order>() {

        private final DateFormat format = new SimpleDateFormat("yyyy-MM-dd");

        private final Date date = format.parse("1995-03-12");

        @Override
        public boolean filter(Order o) throws ParseException {
            return format.parse(o.getOrderdate()).before(date);
        }
    });
    // Filter all Lineitems with l_shipdate > 12.03.1995
    lineitems = lineitems.filter(new FilterFunction<Lineitem>() {

        private final DateFormat format = new SimpleDateFormat("yyyy-MM-dd");

        private final Date date = format.parse("1995-03-12");

        @Override
        public boolean filter(Lineitem l) throws ParseException {
            return format.parse(l.getShipdate()).after(date);
        }
    });
    // Join customers with orders and package them into a ShippingPriorityItem
    DataSet<ShippingPriorityItem> customerWithOrders = customers.join(orders).where(0).equalTo(1).with(new JoinFunction<Customer, Order, ShippingPriorityItem>() {

        @Override
        public ShippingPriorityItem join(Customer c, Order o) {
            return new ShippingPriorityItem(o.getOrderKey(), 0.0, o.getOrderdate(), o.getShippriority());
        }
    });
    // Join the last join result with Lineitems
    DataSet<ShippingPriorityItem> result = customerWithOrders.join(lineitems).where(0).equalTo(0).with(new JoinFunction<ShippingPriorityItem, Lineitem, ShippingPriorityItem>() {

        @Override
        public ShippingPriorityItem join(ShippingPriorityItem i, Lineitem l) {
            i.setRevenue(l.getExtendedprice() * (1 - l.getDiscount()));
            return i;
        }
    }).groupBy(0, 2, 3).aggregate(Aggregations.SUM, 1);
    // emit result
    if (params.has("output")) {
        result.writeAsCsv(params.get("output"), "\n", "|");
        // execute program
        env.execute("TPCH Query 3 Example");
    } else {
        System.out.println("Printing result to stdout. Use --output to specify output path.");
        result.print();
    }
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Date(java.util.Date) SimpleDateFormat(java.text.SimpleDateFormat) DateFormat(java.text.DateFormat) SimpleDateFormat(java.text.SimpleDateFormat)

Example 10 with ParameterTool

use of org.apache.flink.api.java.utils.ParameterTool in project flink by apache.

the class KMeans method main.

public static void main(String[] args) throws Exception {
    // Checking input parameters
    final ParameterTool params = ParameterTool.fromArgs(args);
    // set up execution environment
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // make parameters available in the web interface
    env.getConfig().setGlobalJobParameters(params);
    // get input data:
    // read the points and centroids from the provided paths or fall back to default data
    DataSet<Point> points = getPointDataSet(params, env);
    DataSet<Centroid> centroids = getCentroidDataSet(params, env);
    // set number of bulk iterations for KMeans algorithm
    IterativeDataSet<Centroid> loop = centroids.iterate(params.getInt("iterations", 10));
    DataSet<Centroid> newCentroids = points.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids").map(new CountAppender()).groupBy(0).reduce(new CentroidAccumulator()).map(new CentroidAverager());
    // feed new centroids back into next iteration
    DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);
    DataSet<Tuple2<Integer, Point>> clusteredPoints = points.map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");
    // emit result
    if (params.has("output")) {
        clusteredPoints.writeAsCsv(params.get("output"), "\n", " ");
        // since file sinks are lazy, we trigger the execution explicitly
        env.execute("KMeans Example");
    } else {
        System.out.println("Printing result to stdout. Use --output to specify output path.");
        clusteredPoints.print();
    }
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2)

Aggregations

ParameterTool (org.apache.flink.api.java.utils.ParameterTool)43 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)19 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)19 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)15 JobExecutionResult (org.apache.flink.api.common.JobExecutionResult)7 NumberFormat (java.text.NumberFormat)6 Properties (java.util.Properties)6 ProgramParametrizationException (org.apache.flink.client.program.ProgramParametrizationException)6 JDKRandomGeneratorFactory (org.apache.flink.graph.generator.random.JDKRandomGeneratorFactory)6 LongValue (org.apache.flink.types.LongValue)6 NullValue (org.apache.flink.types.NullValue)6 Graph (org.apache.flink.graph.Graph)5 GraphCsvReader (org.apache.flink.graph.GraphCsvReader)5 LongValueToUnsignedIntValue (org.apache.flink.graph.asm.translate.translators.LongValueToUnsignedIntValue)5 RMatGraph (org.apache.flink.graph.generator.RMatGraph)5 RandomGenerableFactory (org.apache.flink.graph.generator.random.RandomGenerableFactory)5 SimpleStringSchema (org.apache.flink.streaming.util.serialization.SimpleStringSchema)5 IntValue (org.apache.flink.types.IntValue)5 StringValue (org.apache.flink.types.StringValue)4 DataSet (org.apache.flink.api.java.DataSet)3