Search in sources :

Example 26 with Path

use of org.apache.flink.core.fs.Path in project flink by apache.

the class AvroPojoTest method testSerializeWithAvro.

@Test
public void testSerializeWithAvro() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().enableForceAvro();
    Path in = new Path(inFile.getAbsoluteFile().toURI());
    AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
    DataSet<User> usersDS = env.createInput(users).map(new MapFunction<User, User>() {

        @Override
        public User map(User value) throws Exception {
            Map<CharSequence, Long> ab = new HashMap<CharSequence, Long>(1);
            ab.put("hehe", 12L);
            value.setTypeMap(ab);
            return value;
        }
    });
    usersDS.writeAsText(resultPath);
    env.execute("Simple Avro read job");
    expected = "{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null, \"type_long_test\": null, \"type_double_test\": 123.45, \"type_null_test\": null, \"type_bool_test\": true, \"type_array_string\": [\"ELEMENT 1\", \"ELEMENT 2\"], \"type_array_boolean\": [true, false], \"type_nullable_array\": null, \"type_enum\": \"GREEN\", \"type_map\": {\"hehe\": 12}, \"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}}\n" + "{\"name\": \"Charlie\", \"favorite_number\": null, \"favorite_color\": \"blue\", \"type_long_test\": 1337, \"type_double_test\": 1.337, \"type_null_test\": null, \"type_bool_test\": false, \"type_array_string\": [], \"type_array_boolean\": [], \"type_nullable_array\": null, \"type_enum\": \"RED\", \"type_map\": {\"hehe\": 12}, \"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}}\n";
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) User(org.apache.flink.api.io.avro.generated.User) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 27 with Path

use of org.apache.flink.core.fs.Path in project flink by apache.

the class AvroPojoTest method testKeySelection.

@Test
public void testKeySelection() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().enableObjectReuse();
    Path in = new Path(inFile.getAbsoluteFile().toURI());
    AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
    DataSet<User> usersDS = env.createInput(users);
    DataSet<Tuple2<String, Integer>> res = usersDS.groupBy("name").reduceGroup(new GroupReduceFunction<User, Tuple2<String, Integer>>() {

        @Override
        public void reduce(Iterable<User> values, Collector<Tuple2<String, Integer>> out) throws Exception {
            for (User u : values) {
                out.collect(new Tuple2<String, Integer>(u.getName().toString(), 1));
            }
        }
    });
    res.writeAsText(resultPath);
    env.execute("Avro Key selection");
    expected = "(Alyssa,1)\n(Charlie,1)\n";
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) User(org.apache.flink.api.io.avro.generated.User) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 28 with Path

use of org.apache.flink.core.fs.Path in project flink by apache.

the class AvroPojoTest method testSimpleAvroRead.

@Test
public void testSimpleAvroRead() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    Path in = new Path(inFile.getAbsoluteFile().toURI());
    AvroInputFormat<User> users = new AvroInputFormat<User>(in, User.class);
    DataSet<User> usersDS = env.createInput(users).map(new MapFunction<User, User>() {

        @Override
        public User map(User value) throws Exception {
            value.setTypeMap(null);
            return value;
        }
    });
    usersDS.writeAsText(resultPath);
    env.execute("Simple Avro read job");
    expected = "{\"name\": \"Alyssa\", \"favorite_number\": 256, \"favorite_color\": null, \"type_long_test\": null, \"type_double_test\": 123.45, \"type_null_test\": null, \"type_bool_test\": true, \"type_array_string\": [\"ELEMENT 1\", \"ELEMENT 2\"], \"type_array_boolean\": [true, false], \"type_nullable_array\": null, \"type_enum\": \"GREEN\", \"type_map\": null, \"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}}\n" + "{\"name\": \"Charlie\", \"favorite_number\": null, \"favorite_color\": \"blue\", \"type_long_test\": 1337, \"type_double_test\": 1.337, \"type_null_test\": null, \"type_bool_test\": false, \"type_array_string\": [], \"type_array_boolean\": [], \"type_nullable_array\": null, \"type_enum\": \"RED\", \"type_map\": null, \"type_fixed\": null, \"type_union\": null, \"type_nested\": {\"num\": 239, \"street\": \"Baker Street\", \"city\": \"London\", \"state\": \"London\", \"zip\": \"NW1 6XE\"}}\n";
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) User(org.apache.flink.api.io.avro.generated.User) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) Test(org.junit.Test)

Example 29 with Path

use of org.apache.flink.core.fs.Path in project flink by apache.

the class AvroOutputFormatTest method testCompression.

@Test
public void testCompression() throws Exception {
    // given
    final Path outputPath = new Path(File.createTempFile("avro-output-file", "avro").getAbsolutePath());
    final AvroOutputFormat<User> outputFormat = new AvroOutputFormat<>(outputPath, User.class);
    outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
    final Path compressedOutputPath = new Path(File.createTempFile("avro-output-file", "compressed.avro").getAbsolutePath());
    final AvroOutputFormat<User> compressedOutputFormat = new AvroOutputFormat<>(compressedOutputPath, User.class);
    compressedOutputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
    compressedOutputFormat.setCodec(Codec.SNAPPY);
    // when
    output(outputFormat);
    output(compressedOutputFormat);
    // then
    assertTrue(fileSize(outputPath) > fileSize(compressedOutputPath));
    // cleanup
    Files.delete(Paths.get(outputPath.getPath()));
    Files.delete(Paths.get(compressedOutputPath.getPath()));
}
Also used : Path(org.apache.flink.core.fs.Path) User(org.apache.flink.api.io.avro.example.User) Test(org.junit.Test)

Example 30 with Path

use of org.apache.flink.core.fs.Path in project flink by apache.

the class AvroRecordInputFormatTest method testDeserialisationGenericRecordReuseAvroValueFalse.

/**
	 * Test if the AvroInputFormat is able to properly read data from an avro
	 * file as a GenericRecord
	 * 
	 * @throws IOException,
	 *             if there is an error
	 */
@Test
public void testDeserialisationGenericRecordReuseAvroValueFalse() throws IOException {
    Configuration parameters = new Configuration();
    AvroInputFormat<GenericRecord> format = new AvroInputFormat<GenericRecord>(new Path(testFile.getAbsolutePath()), GenericRecord.class);
    format.configure(parameters);
    format.setReuseAvroValue(false);
    doTestDeserializationGenericRecord(format, parameters);
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.flink.configuration.Configuration) AvroInputFormat(org.apache.flink.api.java.io.AvroInputFormat) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Aggregations

Path (org.apache.flink.core.fs.Path)590 Test (org.junit.Test)320 File (java.io.File)178 Configuration (org.apache.flink.configuration.Configuration)101 IOException (java.io.IOException)91 FileSystem (org.apache.flink.core.fs.FileSystem)87 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)72 ArrayList (java.util.ArrayList)64 HashMap (java.util.HashMap)40 FileOutputStream (java.io.FileOutputStream)38 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)36 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)35 List (java.util.List)33 Map (java.util.Map)29 JobID (org.apache.flink.api.common.JobID)29 OutputStreamWriter (java.io.OutputStreamWriter)27 FSDataOutputStream (org.apache.flink.core.fs.FSDataOutputStream)27 FileStatus (org.apache.flink.core.fs.FileStatus)26 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)25 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)21