Search in sources :

Example 1 with AvroInputFormat

use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.

the class AvroPojoTest method testSerializeWithAvro.

@Test
public void testSerializeWithAvro() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().enableForceAvro();
    Path in = new Path(inFile.getAbsoluteFile().toURI());
    AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
    DataSet<User> usersDS = env.createInput(users).map(new MapFunction<User, User>() {

        @Override
        public User map(User value) throws Exception {
            Map<CharSequence, Long> ab = new HashMap<CharSequence, Long>(1);
            ab.put("hehe", 12L);
            value.setTypeMap(ab);
            return value;
        }
    });
    usersDS.writeAsText(resultPath);
    env.execute("Simple Avro read job");
    expected = "{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null, \"type_long_test\": null, \"type_double_test\": 123.45, \"type_null_test\": null, \"type_bool_test\": true, \"type_array_string\": [\"ELEMENT 1\", \"ELEMENT 2\"], \"type_array_boolean\": [true, false], \"type_nullable_array\": null, \"type_enum\": \"GREEN\", \"type_map\": {\"hehe\": 12}, \"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}}\n" + "{\"name\": \"Charlie\", \"favorite_number\": null, \"favorite_color\": \"blue\", \"type_long_test\": 1337, \"type_double_test\": 1.337, \"type_null_test\": null, \"type_bool_test\": false, \"type_array_string\": [], \"type_array_boolean\": [], \"type_nullable_array\": null, \"type_enum\": \"RED\", \"type_map\": {\"hehe\": 12}, \"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}}\n";
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) User(org.apache.flink.api.io.avro.generated.User) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 2 with AvroInputFormat

use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.

the class AvroPojoTest method testKeySelection.

@Test
public void testKeySelection() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().enableObjectReuse();
    Path in = new Path(inFile.getAbsoluteFile().toURI());
    AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
    DataSet<User> usersDS = env.createInput(users);
    DataSet<Tuple2<String, Integer>> res = usersDS.groupBy("name").reduceGroup(new GroupReduceFunction<User, Tuple2<String, Integer>>() {

        @Override
        public void reduce(Iterable<User> values, Collector<Tuple2<String, Integer>> out) throws Exception {
            for (User u : values) {
                out.collect(new Tuple2<String, Integer>(u.getName().toString(), 1));
            }
        }
    });
    res.writeAsText(resultPath);
    env.execute("Avro Key selection");
    expected = "(Alyssa,1)\n(Charlie,1)\n";
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) User(org.apache.flink.api.io.avro.generated.User) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 3 with AvroInputFormat

use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.

the class AvroPojoTest method testSimpleAvroRead.

@Test
public void testSimpleAvroRead() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    Path in = new Path(inFile.getAbsoluteFile().toURI());
    AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
    DataSet<User> usersDS = env.createInput(users).map(new MapFunction<User, User>() {

        @Override
        public User map(User value) throws Exception {
            value.setTypeMap(null);
            return value;
        }
    });
    usersDS.writeAsText(resultPath);
    env.execute("Simple Avro read job");
    expected = "{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null, \"type_long_test\": null, \"type_double_test\": 123.45, \"type_null_test\": null, \"type_bool_test\": true, \"type_array_string\": [\"ELEMENT 1\", \"ELEMENT 2\"], \"type_array_boolean\": [true, false], \"type_nullable_array\": null, \"type_enum\": \"GREEN\", \"type_map\": null, \"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}}\n" + "{\"name\": \"Charlie\", \"favorite_number\": null, \"favorite_color\": \"blue\", \"type_long_test\": 1337, \"type_double_test\": 1.337, \"type_null_test\": null, \"type_bool_test\": false, \"type_array_string\": [], \"type_array_boolean\": [], \"type_nullable_array\": null, \"type_enum\": \"RED\", \"type_map\": null, \"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}}\n";
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) User(org.apache.flink.api.io.avro.generated.User) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) Test(org.junit.Test)

Example 4 with AvroInputFormat

use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.

the class AvroRecordInputFormatTest method testDeserialisationGenericRecordReuseAvroValueFalse.

/**
	 * Test if the AvroInputFormat is able to properly read data from an avro
	 * file as a GenericRecord
	 * 
	 * @throws IOException,
	 *             if there is an error
	 */
@Test
public void testDeserialisationGenericRecordReuseAvroValueFalse() throws IOException {
    Configuration parameters = new Configuration();
    AvroInputFormat<GenericRecord> format = new AvroInputFormat<GenericRecord>(new Path(testFile.getAbsolutePath()), GenericRecord.class);
    format.configure(parameters);
    format.setReuseAvroValue(false);
    doTestDeserializationGenericRecord(format, parameters);
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 5 with AvroInputFormat

use of org.apache.flink.api.java.io.AvroInputFormat in project flink by apache.

the class AvroRecordInputFormatTest method testDeserialisation.

/**
	 * Test if the AvroInputFormat is able to properly read data from an avro file.
	 * @throws IOException
	 */
@Test
public void testDeserialisation() throws IOException {
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<User>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(1);
    assertEquals(splits.length, 1);
    format.open(splits[0]);
    User u = format.nextRecord(null);
    assertNotNull(u);
    String name = u.getName().toString();
    assertNotNull("empty record", name);
    assertEquals("name not equal", TEST_NAME, name);
    // check arrays
    List<CharSequence> sl = u.getTypeArrayString();
    assertEquals("element 0 not equal", TEST_ARRAY_STRING_1, sl.get(0).toString());
    assertEquals("element 1 not equal", TEST_ARRAY_STRING_2, sl.get(1).toString());
    List<Boolean> bl = u.getTypeArrayBoolean();
    assertEquals("element 0 not equal", TEST_ARRAY_BOOLEAN_1, bl.get(0));
    assertEquals("element 1 not equal", TEST_ARRAY_BOOLEAN_2, bl.get(1));
    // check enums
    Colors enumValue = u.getTypeEnum();
    assertEquals("enum not equal", TEST_ENUM_COLOR, enumValue);
    // check maps
    Map<CharSequence, Long> lm = u.getTypeMap();
    assertEquals("map value of key 1 not equal", TEST_MAP_VALUE1, lm.get(new Utf8(TEST_MAP_KEY1)).longValue());
    assertEquals("map value of key 2 not equal", TEST_MAP_VALUE2, lm.get(new Utf8(TEST_MAP_KEY2)).longValue());
    assertFalse("expecting second element", format.reachedEnd());
    assertNotNull("expecting second element", format.nextRecord(u));
    assertNull(format.nextRecord(u));
    assertTrue(format.reachedEnd());
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) User(org.apache.flink.api.io.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Colors(org.apache.flink.api.io.avro.generated.Colors) Utf8(org.apache.avro.util.Utf8) Test(org.junit.Test)

Aggregations

AvroInputFormat (org.apache.flink.api.java.io.AvroInputFormat)13 Path (org.apache.flink.core.fs.Path)13 Test (org.junit.Test)12 User (org.apache.flink.api.io.avro.generated.User)11 Configuration (org.apache.flink.configuration.Configuration)7 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)6 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)5 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)3 GenericRecord (org.apache.avro.generic.GenericRecord)2 Utf8 (org.apache.avro.util.Utf8)2 Colors (org.apache.flink.api.io.avro.generated.Colors)2 KeySelector (org.apache.flink.api.java.functions.KeySelector)2 HashMap (java.util.HashMap)1 Map (java.util.Map)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1