Search in sources :

Example 1 with AvroInputFormat

use of org.apache.flink.formats.avro.AvroInputFormat in project flink by apache.

the class AvroTypeExtractionTest method testField.

private void testField(final String fieldName) throws Exception {
    before();
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    Path in = new Path(inFile.getAbsoluteFile().toURI());
    AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class);
    DataSet<User> usersDS = env.createInput(users);
    DataSet<Object> res = usersDS.groupBy(fieldName).reduceGroup((GroupReduceFunction<User, Object>) (values, out) -> {
        for (User u : values) {
            out.collect(u.get(fieldName));
        }
    }).returns(Object.class);
    res.writeAsText(resultPath);
    env.execute("Simple Avro read job");
    // test if automatic registration of the Types worked
    ExecutionConfig ec = env.getConfig();
    Assert.assertTrue(ec.getRegisteredKryoTypes().contains(Fixed16.class));
    switch(fieldName) {
        case "name":
            expected = "Alyssa\nCharlie";
            break;
        case "type_enum":
            expected = "GREEN\nRED\n";
            break;
        case "type_double_test":
            expected = "123.45\n1.337\n";
            break;
        default:
            Assert.fail("Unknown field");
            break;
    }
    after();
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Fixed16(org.apache.flink.formats.avro.generated.Fixed16) User(org.apache.flink.formats.avro.generated.User) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) AvroInputFormat(org.apache.flink.formats.avro.AvroInputFormat) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig)

Example 2 with AvroInputFormat

use of org.apache.flink.formats.avro.AvroInputFormat in project flink by apache.

the class AvroTypeExtractionTest method testSimpleAvroRead.

@Test
public void testSimpleAvroRead() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    Path in = new Path(inFile.getAbsoluteFile().toURI());
    AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class);
    DataSet<User> usersDS = env.createInput(users).map((value) -> value);
    usersDS.writeAsText(resultPath);
    env.execute("Simple Avro read job");
    expected = "{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null, " + "\"type_long_test\": null, \"type_double_test\": 123.45, \"type_null_test\": null, " + "\"type_bool_test\": true, \"type_array_string\": [\"ELEMENT 1\", \"ELEMENT 2\"], " + "\"type_array_boolean\": [true, false], \"type_nullable_array\": null, \"type_enum\": \"GREEN\", " + "\"type_map\": {\"KEY 2\": 17554, \"KEY 1\": 8546456}, \"type_fixed\": null, \"type_union\": null, " + "\"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", " + "\"state\": \"London\", \"zip\": \"NW1 6XE\"}, " + "\"type_bytes\": \"\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\", " + "\"type_date\": 2014-03-01, \"type_time_millis\": 12:12:12, \"type_time_micros\": 00:00:00.123456, " + "\"type_timestamp_millis\": 2014-03-01T12:12:12.321Z, " + "\"type_timestamp_micros\": 1970-01-01T00:00:00.123456Z, \"type_decimal_bytes\": \"\\u0007Ð\", " + "\"type_decimal_fixed\": [7, -48]}\n" + "{\"name\": \"Charlie\", \"favorite_number\": null, " + "\"favorite_color\": \"blue\", \"type_long_test\": 1337, \"type_double_test\": 1.337, " + "\"type_null_test\": null, \"type_bool_test\": false, \"type_array_string\": [], " + "\"type_array_boolean\": [], \"type_nullable_array\": null, \"type_enum\": \"RED\", \"type_map\": {}, " + "\"type_fixed\": null, \"type_union\": null, " + "\"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", " + "\"zip\": \"NW1 6XE\"}, " + "\"type_bytes\": \"\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\", " + "\"type_date\": 2014-03-01, \"type_time_millis\": 12:12:12, \"type_time_micros\": 00:00:00.123456, " + "\"type_timestamp_millis\": 2014-03-01T12:12:12.321Z, " + "\"type_timestamp_micros\": 1970-01-01T00:00:00.123456Z, \"type_decimal_bytes\": \"\\u0007Ð\", " + "\"type_decimal_fixed\": [7, -48]}\n";
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) User(org.apache.flink.formats.avro.generated.User) AvroInputFormat(org.apache.flink.formats.avro.AvroInputFormat) AvroRecordInputFormatTest(org.apache.flink.formats.avro.AvroRecordInputFormatTest) Test(org.junit.Test)

Example 3 with AvroInputFormat

use of org.apache.flink.formats.avro.AvroInputFormat in project flink by apache.

the class AvroTypeExtractionTest method testWithKryoGenericSer.

@Test
public void testWithKryoGenericSer() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().enableForceKryo();
    Path in = new Path(inFile.getAbsoluteFile().toURI());
    AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class);
    DataSet<User> usersDS = env.createInput(users);
    DataSet<Tuple2<String, Integer>> res = usersDS.groupBy((KeySelector<User, String>) value -> String.valueOf(value.getName())).reduceGroup((GroupReduceFunction<User, Tuple2<String, Integer>>) (values, out) -> {
        for (User u : values) {
            out.collect(new Tuple2<>(u.getName().toString(), 1));
        }
    }).returns(Types.TUPLE(Types.STRING, Types.INT));
    res.writeAsText(resultPath);
    env.execute("Avro Key selection");
    expected = "(Charlie,1)\n(Alyssa,1)\n";
}
Also used : Path(org.apache.flink.core.fs.Path) Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) MultipleProgramsTestBase(org.apache.flink.test.util.MultipleProgramsTestBase) MapFunction(org.apache.flink.api.common.functions.MapFunction) AvroRecordInputFormatTest(org.apache.flink.formats.avro.AvroRecordInputFormatTest) DataSet(org.apache.flink.api.java.DataSet) Path(org.apache.flink.core.fs.Path) After(org.junit.After) Map(java.util.Map) Parameterized(org.junit.runners.Parameterized) Before(org.junit.Before) Types(org.apache.flink.api.common.typeinfo.Types) AvroInputFormat(org.apache.flink.formats.avro.AvroInputFormat) Fixed16(org.apache.flink.formats.avro.generated.Fixed16) KeySelector(org.apache.flink.api.java.functions.KeySelector) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) Test(org.junit.Test) File(java.io.File) Rule(org.junit.Rule) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) User(org.apache.flink.formats.avro.generated.User) Assert(org.junit.Assert) TemporaryFolder(org.junit.rules.TemporaryFolder) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) User(org.apache.flink.formats.avro.generated.User) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) AvroInputFormat(org.apache.flink.formats.avro.AvroInputFormat) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AvroRecordInputFormatTest(org.apache.flink.formats.avro.AvroRecordInputFormatTest) Test(org.junit.Test)

Example 4 with AvroInputFormat

use of org.apache.flink.formats.avro.AvroInputFormat in project flink by apache.

the class AvroTypeExtractionTest method testSerializeWithAvro.

@Test
public void testSerializeWithAvro() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().enableForceAvro();
    Path in = new Path(inFile.getAbsoluteFile().toURI());
    AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class);
    DataSet<User> usersDS = env.createInput(users).map((MapFunction<User, User>) value -> {
        Map<CharSequence, Long> ab = new HashMap<>(1);
        ab.put("hehe", 12L);
        value.setTypeMap(ab);
        return value;
    });
    usersDS.writeAsText(resultPath);
    env.execute("Simple Avro read job");
    expected = "{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null," + " \"type_long_test\": null, \"type_double_test\": 123.45, \"type_null_test\": null," + " \"type_bool_test\": true, \"type_array_string\": [\"ELEMENT 1\", \"ELEMENT 2\"]," + " \"type_array_boolean\": [true, false], \"type_nullable_array\": null, \"type_enum\": \"GREEN\"," + " \"type_map\": {\"hehe\": 12}, \"type_fixed\": null, \"type_union\": null," + " \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\"," + " \"state\": \"London\", \"zip\": \"NW1 6XE\"}," + " \"type_bytes\": \"\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\", " + "\"type_date\": 2014-03-01, \"type_time_millis\": 12:12:12, \"type_time_micros\": 00:00:00.123456, " + "\"type_timestamp_millis\": 2014-03-01T12:12:12.321Z, " + "\"type_timestamp_micros\": 1970-01-01T00:00:00.123456Z, \"type_decimal_bytes\": \"\\u0007Ð\", " + "\"type_decimal_fixed\": [7, -48]}\n" + "{\"name\": \"Charlie\", \"favorite_number\": null, " + "\"favorite_color\": \"blue\", \"type_long_test\": 1337, \"type_double_test\": 1.337, " + "\"type_null_test\": null, \"type_bool_test\": false, \"type_array_string\": [], " + "\"type_array_boolean\": [], \"type_nullable_array\": null, \"type_enum\": \"RED\", " + "\"type_map\": {\"hehe\": 12}, \"type_fixed\": null, \"type_union\": null, " + "\"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", " + "\"zip\": \"NW1 6XE\"}, " + "\"type_bytes\": \"\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\\u0000\", " + "\"type_date\": 2014-03-01, \"type_time_millis\": 12:12:12, \"type_time_micros\": 00:00:00.123456, " + "\"type_timestamp_millis\": 2014-03-01T12:12:12.321Z, " + "\"type_timestamp_micros\": 1970-01-01T00:00:00.123456Z, \"type_decimal_bytes\": \"\\u0007Ð\", " + "\"type_decimal_fixed\": [7, -48]}\n";
}
Also used : Path(org.apache.flink.core.fs.Path) Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) MultipleProgramsTestBase(org.apache.flink.test.util.MultipleProgramsTestBase) MapFunction(org.apache.flink.api.common.functions.MapFunction) AvroRecordInputFormatTest(org.apache.flink.formats.avro.AvroRecordInputFormatTest) DataSet(org.apache.flink.api.java.DataSet) Path(org.apache.flink.core.fs.Path) After(org.junit.After) Map(java.util.Map) Parameterized(org.junit.runners.Parameterized) Before(org.junit.Before) Types(org.apache.flink.api.common.typeinfo.Types) AvroInputFormat(org.apache.flink.formats.avro.AvroInputFormat) Fixed16(org.apache.flink.formats.avro.generated.Fixed16) KeySelector(org.apache.flink.api.java.functions.KeySelector) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) Test(org.junit.Test) File(java.io.File) Rule(org.junit.Rule) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) User(org.apache.flink.formats.avro.generated.User) Assert(org.junit.Assert) TemporaryFolder(org.junit.rules.TemporaryFolder) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) User(org.apache.flink.formats.avro.generated.User) AvroInputFormat(org.apache.flink.formats.avro.AvroInputFormat) HashMap(java.util.HashMap) Map(java.util.Map) AvroRecordInputFormatTest(org.apache.flink.formats.avro.AvroRecordInputFormatTest) Test(org.junit.Test)

Example 5 with AvroInputFormat

use of org.apache.flink.formats.avro.AvroInputFormat in project flink by apache.

the class AvroTypeExtractionTest method testKeySelection.

@Test
public void testKeySelection() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().enableObjectReuse();
    Path in = new Path(inFile.getAbsoluteFile().toURI());
    AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class);
    DataSet<User> usersDS = env.createInput(users);
    DataSet<Tuple2<String, Integer>> res = usersDS.groupBy("name").reduceGroup((GroupReduceFunction<User, Tuple2<String, Integer>>) (values, out) -> {
        for (User u : values) {
            out.collect(new Tuple2<>(u.getName().toString(), 1));
        }
    }).returns(Types.TUPLE(Types.STRING, Types.INT));
    res.writeAsText(resultPath);
    env.execute("Avro Key selection");
    expected = "(Alyssa,1)\n(Charlie,1)\n";
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) User(org.apache.flink.formats.avro.generated.User) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AvroInputFormat(org.apache.flink.formats.avro.AvroInputFormat) AvroRecordInputFormatTest(org.apache.flink.formats.avro.AvroRecordInputFormatTest) Test(org.junit.Test)

Aggregations

ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)6 Path (org.apache.flink.core.fs.Path)6 AvroInputFormat (org.apache.flink.formats.avro.AvroInputFormat)6 User (org.apache.flink.formats.avro.generated.User)6 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)5 AvroRecordInputFormatTest (org.apache.flink.formats.avro.AvroRecordInputFormatTest)5 Test (org.junit.Test)5 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)4 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)4 Fixed16 (org.apache.flink.formats.avro.generated.Fixed16)4 File (java.io.File)3 Arrays (java.util.Arrays)3 HashMap (java.util.HashMap)3 Map (java.util.Map)3 MapFunction (org.apache.flink.api.common.functions.MapFunction)3 Types (org.apache.flink.api.common.typeinfo.Types)3 DataSet (org.apache.flink.api.java.DataSet)3 KeySelector (org.apache.flink.api.java.functions.KeySelector)3 MultipleProgramsTestBase (org.apache.flink.test.util.MultipleProgramsTestBase)3 After (org.junit.After)3