Search in sources :

Example 11 with User

use of org.apache.flink.formats.avro.generated.User in project flink by apache.

the class AvroTypeExtractionTest method testWithKryoGenericSer.

@Test
public void testWithKryoGenericSer() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().enableForceKryo();
    Path in = new Path(inFile.getAbsoluteFile().toURI());
    AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class);
    DataSet<User> usersDS = env.createInput(users);
    DataSet<Tuple2<String, Integer>> res = usersDS.groupBy((KeySelector<User, String>) value -> String.valueOf(value.getName())).reduceGroup((GroupReduceFunction<User, Tuple2<String, Integer>>) (values, out) -> {
        for (User u : values) {
            out.collect(new Tuple2<>(u.getName().toString(), 1));
        }
    }).returns(Types.TUPLE(Types.STRING, Types.INT));
    res.writeAsText(resultPath);
    env.execute("Avro Key selection");
    expected = "(Charlie,1)\n(Alyssa,1)\n";
}
Also used : Path(org.apache.flink.core.fs.Path) Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) MultipleProgramsTestBase(org.apache.flink.test.util.MultipleProgramsTestBase) MapFunction(org.apache.flink.api.common.functions.MapFunction) AvroRecordInputFormatTest(org.apache.flink.formats.avro.AvroRecordInputFormatTest) DataSet(org.apache.flink.api.java.DataSet) Path(org.apache.flink.core.fs.Path) After(org.junit.After) Map(java.util.Map) Parameterized(org.junit.runners.Parameterized) Before(org.junit.Before) Types(org.apache.flink.api.common.typeinfo.Types) AvroInputFormat(org.apache.flink.formats.avro.AvroInputFormat) Fixed16(org.apache.flink.formats.avro.generated.Fixed16) KeySelector(org.apache.flink.api.java.functions.KeySelector) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) Test(org.junit.Test) File(java.io.File) Rule(org.junit.Rule) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) User(org.apache.flink.formats.avro.generated.User) Assert(org.junit.Assert) TemporaryFolder(org.junit.rules.TemporaryFolder) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) User(org.apache.flink.formats.avro.generated.User) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) AvroInputFormat(org.apache.flink.formats.avro.AvroInputFormat) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AvroRecordInputFormatTest(org.apache.flink.formats.avro.AvroRecordInputFormatTest) Test(org.junit.Test)

Example 12 with User

use of org.apache.flink.formats.avro.generated.User in project flink by apache.

the class AvroSplittableInputFormatTest method testAvroRecovery.

@Test
public void testAvroRecovery() throws Exception {
    final int recordsUntilCheckpoint = 132;
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(4);
    assertEquals(splits.length, 4);
    int elements = 0;
    int[] elementsPerSplit = new int[4];
    for (int i = 0; i < splits.length; i++) {
        format.open(splits[i]);
        while (!format.reachedEnd()) {
            User u = format.nextRecord(null);
            Assert.assertTrue(u.getName().toString().startsWith(TEST_NAME));
            elements++;
            if (format.getRecordsReadFromBlock() == recordsUntilCheckpoint) {
                // do the whole checkpoint-restore procedure and see if we pick up from where we
                // left off.
                Tuple2<Long, Long> state = format.getCurrentState();
                // this is to make sure that nothing stays from the previous format
                // (as it is going to be in the normal case)
                format = new AvroInputFormat<>(new Path(testFile.getAbsolutePath()), User.class);
                format.reopen(splits[i], state);
                assertEquals(format.getRecordsReadFromBlock(), recordsUntilCheckpoint);
            }
            elementsPerSplit[i]++;
        }
        format.close();
    }
    Assert.assertEquals(1604, elementsPerSplit[0]);
    Assert.assertEquals(1203, elementsPerSplit[1]);
    Assert.assertEquals(1203, elementsPerSplit[2]);
    Assert.assertEquals(990, elementsPerSplit[3]);
    Assert.assertEquals(NUM_RECORDS, elements);
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) User(org.apache.flink.formats.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) Test(org.junit.Test)

Example 13 with User

use of org.apache.flink.formats.avro.generated.User in project flink by apache.

the class AvroSplittableInputFormatTest method testAvroRecoveryWithFailureAtStart.

@Test
public void testAvroRecoveryWithFailureAtStart() throws Exception {
    final int recordsUntilCheckpoint = 132;
    Configuration parameters = new Configuration();
    AvroInputFormat<User> format = new AvroInputFormat<>(new Path(testFile.getAbsolutePath()), User.class);
    format.configure(parameters);
    FileInputSplit[] splits = format.createInputSplits(4);
    assertEquals(splits.length, 4);
    int elements = 0;
    int[] elementsPerSplit = new int[4];
    for (int i = 0; i < splits.length; i++) {
        format.reopen(splits[i], format.getCurrentState());
        while (!format.reachedEnd()) {
            User u = format.nextRecord(null);
            Assert.assertTrue(u.getName().toString().startsWith(TEST_NAME));
            elements++;
            if (format.getRecordsReadFromBlock() == recordsUntilCheckpoint) {
                // do the whole checkpoint-restore procedure and see if we pick up from where we
                // left off.
                Tuple2<Long, Long> state = format.getCurrentState();
                // this is to make sure that nothing stays from the previous format
                // (as it is going to be in the normal case)
                format = new AvroInputFormat<>(new Path(testFile.getAbsolutePath()), User.class);
                format.reopen(splits[i], state);
                assertEquals(format.getRecordsReadFromBlock(), recordsUntilCheckpoint);
            }
            elementsPerSplit[i]++;
        }
        format.close();
    }
    Assert.assertEquals(1604, elementsPerSplit[0]);
    Assert.assertEquals(1203, elementsPerSplit[1]);
    Assert.assertEquals(1203, elementsPerSplit[2]);
    Assert.assertEquals(990, elementsPerSplit[3]);
    Assert.assertEquals(NUM_RECORDS, elements);
    format.close();
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) User(org.apache.flink.formats.avro.generated.User) Configuration(org.apache.flink.configuration.Configuration) Test(org.junit.Test)

Example 14 with User

use of org.apache.flink.formats.avro.generated.User in project flink by apache.

the class EncoderDecoderTest method testGeneratedObjectWithNullableFields.

@Test
public void testGeneratedObjectWithNullableFields() {
    List<CharSequence> strings = Arrays.asList(new CharSequence[] { "These", "strings", "should", "be", "recognizable", "as", "a", "meaningful", "sequence" });
    List<Boolean> bools = Arrays.asList(true, true, false, false, true, false, true, true);
    Map<CharSequence, Long> map = new HashMap<>();
    map.put("1", 1L);
    map.put("2", 2L);
    map.put("3", 3L);
    byte[] b = new byte[16];
    new Random().nextBytes(b);
    Fixed16 f = new Fixed16(b);
    Address addr = new Address(239, "6th Main", "Bangalore", "Karnataka", "560075");
    User user = new User("Freudenreich", 1337, "macintosh gray", 1234567890L, 3.1415926, null, true, strings, bools, null, Colors.GREEN, map, f, Boolean.TRUE, addr, ByteBuffer.wrap(b), LocalDate.parse("2014-03-01"), LocalTime.parse("12:12:12"), LocalTime.ofSecondOfDay(0).plus(123456L, ChronoUnit.MICROS), Instant.parse("2014-03-01T12:12:12.321Z"), Instant.ofEpochSecond(0).plus(123456L, ChronoUnit.MICROS), ByteBuffer.wrap(// 20.00
    BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()), new Fixed2(BigDecimal.valueOf(2000, 2).unscaledValue().toByteArray()));
    testObjectSerialization(user);
}
Also used : User(org.apache.flink.formats.avro.generated.User) Address(org.apache.flink.formats.avro.generated.Address) HashMap(java.util.HashMap) Fixed2(org.apache.flink.formats.avro.generated.Fixed2) Fixed16(org.apache.flink.formats.avro.generated.Fixed16) Random(java.util.Random) Test(org.junit.Test)

Example 15 with User

use of org.apache.flink.formats.avro.generated.User in project flink by apache.

the class AvroOutputFormatITCase method postSubmit.

@Override
protected void postSubmit() throws Exception {
    // compare result for specific user type
    File[] output1;
    File file1 = asFile(outputPath1);
    if (file1.isDirectory()) {
        output1 = file1.listFiles();
        // check for avro ext in dir.
        for (File avroOutput : Objects.requireNonNull(output1)) {
            Assert.assertTrue("Expect extension '.avro'", avroOutput.toString().endsWith(".avro"));
        }
    } else {
        output1 = new File[] { file1 };
    }
    List<String> result1 = new ArrayList<>();
    DatumReader<User> userDatumReader1 = new SpecificDatumReader<>(User.class);
    for (File avroOutput : output1) {
        DataFileReader<User> dataFileReader1 = new DataFileReader<>(avroOutput, userDatumReader1);
        while (dataFileReader1.hasNext()) {
            User user = dataFileReader1.next();
            result1.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor());
        }
    }
    for (String expectedResult : userData.split("\n")) {
        Assert.assertTrue("expected user " + expectedResult + " not found.", result1.contains(expectedResult));
    }
    // compare result for reflect user type
    File[] output2;
    File file2 = asFile(outputPath2);
    if (file2.isDirectory()) {
        output2 = file2.listFiles();
    } else {
        output2 = new File[] { file2 };
    }
    List<String> result2 = new ArrayList<>();
    DatumReader<ReflectiveUser> userDatumReader2 = new ReflectDatumReader<>(ReflectiveUser.class);
    for (File avroOutput : Objects.requireNonNull(output2)) {
        DataFileReader<ReflectiveUser> dataFileReader2 = new DataFileReader<>(avroOutput, userDatumReader2);
        while (dataFileReader2.hasNext()) {
            ReflectiveUser user = dataFileReader2.next();
            result2.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor());
        }
    }
    for (String expectedResult : userData.split("\n")) {
        Assert.assertTrue("expected user " + expectedResult + " not found.", result2.contains(expectedResult));
    }
}
Also used : User(org.apache.flink.formats.avro.generated.User) ArrayList(java.util.ArrayList) DataFileReader(org.apache.avro.file.DataFileReader) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) ReflectDatumReader(org.apache.avro.reflect.ReflectDatumReader) File(java.io.File)

Aggregations

User (org.apache.flink.formats.avro.generated.User)28 Test (org.junit.Test)19 Path (org.apache.flink.core.fs.Path)12 Fixed16 (org.apache.flink.formats.avro.generated.Fixed16)8 HashMap (java.util.HashMap)7 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)7 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)6 Configuration (org.apache.flink.configuration.Configuration)6 AvroInputFormat (org.apache.flink.formats.avro.AvroInputFormat)6 Address (org.apache.flink.formats.avro.generated.Address)6 Fixed2 (org.apache.flink.formats.avro.generated.Fixed2)6 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)5 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)5 AvroRecordInputFormatTest (org.apache.flink.formats.avro.AvroRecordInputFormatTest)5 File (java.io.File)4 ArrayList (java.util.ArrayList)4 Arrays (java.util.Arrays)4 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)4 Table (org.apache.flink.table.api.Table)4 StreamTableEnvironment (org.apache.flink.table.api.bridge.java.StreamTableEnvironment)4