Search in sources :

Example 26 with DataFileReader

use of org.apache.avro.file.DataFileReader in project mist by snuspl.

the class DefaultGroupCheckpointStore method loadSavedQueries.

@Override
public List<AvroDag> loadSavedQueries(final List<String> queryIdList) throws IOException {
    final List<AvroDag> savedQueries = new ArrayList<>();
    for (final String queryId : queryIdList) {
        final File storedFile = getQueryStoreFile(queryId);
        final DataFileReader<AvroDag> dataFileReader = new DataFileReader<>(storedFile, avroDagDatumReader);
        AvroDag avroDag = null;
        avroDag = dataFileReader.next(avroDag);
        savedQueries.add(avroDag);
    }
    return savedQueries;
}
Also used : AvroDag(edu.snu.mist.formats.avro.AvroDag) DataFileReader(org.apache.avro.file.DataFileReader) ArrayList(java.util.ArrayList) File(java.io.File)

Example 27 with DataFileReader

use of org.apache.avro.file.DataFileReader in project mist by snuspl.

the class DefaultGroupCheckpointStore method loadSavedGroupState.

@Override
public GroupCheckpoint loadSavedGroupState(final String groupId) throws IOException {
    // Load the file.
    final File storedFile = getGroupCheckpointFile(groupId);
    final DataFileReader<GroupCheckpoint> dataFileReader = new DataFileReader<>(storedFile, groupCheckpointDatumReader);
    GroupCheckpoint mgc = null;
    mgc = dataFileReader.next(mgc);
    if (mgc != null) {
        LOG.log(Level.INFO, "Checkpoint file found. groupId is " + groupId);
    } else {
        LOG.log(Level.WARNING, "Checkpoint file not found or error during loading. groupId is " + groupId);
    }
    return mgc;
}
Also used : GroupCheckpoint(edu.snu.mist.formats.avro.GroupCheckpoint) DataFileReader(org.apache.avro.file.DataFileReader) File(java.io.File)

Example 28 with DataFileReader

use of org.apache.avro.file.DataFileReader in project mist by snuspl.

the class AvroExecutionVertexStore method loadAvroPhysicalOperatorChain.

/**
 * Loads the AvroPhysicalOperatorChain with the chainId.
 */
public AvroPhysicalOperatorChain loadAvroPhysicalOperatorChain(final String chainId) throws IOException {
    try {
        final File storedChain = getAvroPhysicalOperatorChainFile(chainId);
        final DataFileReader<AvroPhysicalOperatorChain> dataFileReader = new DataFileReader<>(storedChain, operatorChainDatumReader);
        AvroPhysicalOperatorChain avroPhysicalOperatorChain = null;
        avroPhysicalOperatorChain = dataFileReader.next(avroPhysicalOperatorChain);
        return avroPhysicalOperatorChain;
    } catch (final IOException e) {
        LOG.log(Level.SEVERE, "An exception occurred while loading the AvroPhysicalOperatorChain with ID {0}.", new Object[] { chainId });
        throw e;
    }
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) AvroPhysicalOperatorChain(edu.snu.mist.formats.avro.AvroPhysicalOperatorChain) IOException(java.io.IOException) File(java.io.File)

Example 29 with DataFileReader

use of org.apache.avro.file.DataFileReader in project flink by apache.

the class AvroOutputFormatITCase method postSubmit.

@Override
protected void postSubmit() throws Exception {
    //compare result for specific user type
    File[] output1;
    File file1 = asFile(outputPath1);
    if (file1.isDirectory()) {
        output1 = file1.listFiles();
        // check for avro ext in dir.
        for (File avroOutput : output1) {
            Assert.assertTrue("Expect extension '.avro'", avroOutput.toString().endsWith(".avro"));
        }
    } else {
        output1 = new File[] { file1 };
    }
    List<String> result1 = new ArrayList<String>();
    DatumReader<User> userDatumReader1 = new SpecificDatumReader<User>(User.class);
    for (File avroOutput : output1) {
        DataFileReader<User> dataFileReader1 = new DataFileReader<User>(avroOutput, userDatumReader1);
        while (dataFileReader1.hasNext()) {
            User user = dataFileReader1.next();
            result1.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor());
        }
    }
    for (String expectedResult : userData.split("\n")) {
        Assert.assertTrue("expected user " + expectedResult + " not found.", result1.contains(expectedResult));
    }
    //compare result for reflect user type
    File[] output2;
    File file2 = asFile(outputPath2);
    if (file2.isDirectory()) {
        output2 = file2.listFiles();
    } else {
        output2 = new File[] { file2 };
    }
    List<String> result2 = new ArrayList<String>();
    DatumReader<ReflectiveUser> userDatumReader2 = new ReflectDatumReader<ReflectiveUser>(ReflectiveUser.class);
    for (File avroOutput : output2) {
        DataFileReader<ReflectiveUser> dataFileReader2 = new DataFileReader<ReflectiveUser>(avroOutput, userDatumReader2);
        while (dataFileReader2.hasNext()) {
            ReflectiveUser user = dataFileReader2.next();
            result2.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor());
        }
    }
    for (String expectedResult : userData.split("\n")) {
        Assert.assertTrue("expected user " + expectedResult + " not found.", result2.contains(expectedResult));
    }
}
Also used : User(org.apache.flink.api.io.avro.example.User) ArrayList(java.util.ArrayList) DataFileReader(org.apache.avro.file.DataFileReader) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) ReflectDatumReader(org.apache.avro.reflect.ReflectDatumReader) File(java.io.File)

Example 30 with DataFileReader

use of org.apache.avro.file.DataFileReader in project beam by apache.

the class AvroIOTest method testWindowedAvroIOWrite.

@Test
@Category({ ValidatesRunner.class, UsesTestStream.class })
public void testWindowedAvroIOWrite() throws Throwable {
    Path baseDir = Files.createTempDirectory(tmpFolder.getRoot().toPath(), "testwrite");
    String baseFilename = baseDir.resolve("prefix").toString();
    Instant base = new Instant(0);
    ArrayList<GenericClass> allElements = new ArrayList<>();
    ArrayList<TimestampedValue<GenericClass>> firstWindowElements = new ArrayList<>();
    ArrayList<Instant> firstWindowTimestamps = Lists.newArrayList(base.plus(Duration.standardSeconds(0)), base.plus(Duration.standardSeconds(10)), base.plus(Duration.standardSeconds(20)), base.plus(Duration.standardSeconds(30)));
    Random random = new Random();
    for (int i = 0; i < 100; ++i) {
        GenericClass item = new GenericClass(i, String.valueOf(i));
        allElements.add(item);
        firstWindowElements.add(TimestampedValue.of(item, firstWindowTimestamps.get(random.nextInt(firstWindowTimestamps.size()))));
    }
    ArrayList<TimestampedValue<GenericClass>> secondWindowElements = new ArrayList<>();
    ArrayList<Instant> secondWindowTimestamps = Lists.newArrayList(base.plus(Duration.standardSeconds(60)), base.plus(Duration.standardSeconds(70)), base.plus(Duration.standardSeconds(80)), base.plus(Duration.standardSeconds(90)));
    for (int i = 100; i < 200; ++i) {
        GenericClass item = new GenericClass(i, String.valueOf(i));
        allElements.add(new GenericClass(i, String.valueOf(i)));
        secondWindowElements.add(TimestampedValue.of(item, secondWindowTimestamps.get(random.nextInt(secondWindowTimestamps.size()))));
    }
    TimestampedValue<GenericClass>[] firstWindowArray = firstWindowElements.toArray(new TimestampedValue[100]);
    TimestampedValue<GenericClass>[] secondWindowArray = secondWindowElements.toArray(new TimestampedValue[100]);
    TestStream<GenericClass> values = TestStream.create(AvroCoder.of(GenericClass.class)).advanceWatermarkTo(new Instant(0)).addElements(firstWindowArray[0], Arrays.copyOfRange(firstWindowArray, 1, firstWindowArray.length)).advanceWatermarkTo(new Instant(0).plus(Duration.standardMinutes(1))).addElements(secondWindowArray[0], Arrays.copyOfRange(secondWindowArray, 1, secondWindowArray.length)).advanceWatermarkToInfinity();
    FilenamePolicy policy = new WindowedFilenamePolicy(baseFilename);
    windowedAvroWritePipeline.apply(values).apply(Window.<GenericClass>into(FixedWindows.of(Duration.standardMinutes(1)))).apply(AvroIO.write(GenericClass.class).to(baseFilename).withFilenamePolicy(policy).withWindowedWrites().withNumShards(2));
    windowedAvroWritePipeline.run();
    // Validate that the data written matches the expected elements in the expected order
    List<File> expectedFiles = new ArrayList<>();
    for (int shard = 0; shard < 2; shard++) {
        for (int window = 0; window < 2; window++) {
            Instant windowStart = new Instant(0).plus(Duration.standardMinutes(window));
            IntervalWindow intervalWindow = new IntervalWindow(windowStart, Duration.standardMinutes(1));
            expectedFiles.add(new File(baseFilename + "-" + intervalWindow.toString() + "-" + shard + "-of-1" + "-pane-0-final"));
        }
    }
    List<GenericClass> actualElements = new ArrayList<>();
    for (File outputFile : expectedFiles) {
        assertTrue("Expected output file " + outputFile.getAbsolutePath(), outputFile.exists());
        try (DataFileReader<GenericClass> reader = new DataFileReader<>(outputFile, new ReflectDatumReader<GenericClass>(ReflectData.get().getSchema(GenericClass.class)))) {
            Iterators.addAll(actualElements, reader);
        }
        outputFile.delete();
    }
    assertThat(actualElements, containsInAnyOrder(allElements.toArray()));
}
Also used : Path(java.nio.file.Path) Instant(org.joda.time.Instant) ArrayList(java.util.ArrayList) FilenamePolicy(org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy) DataFileReader(org.apache.avro.file.DataFileReader) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) Random(java.util.Random) File(java.io.File) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

DataFileReader (org.apache.avro.file.DataFileReader)46 GenericRecord (org.apache.avro.generic.GenericRecord)28 File (java.io.File)26 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)21 Schema (org.apache.avro.Schema)20 Test (org.junit.Test)10 ArrayList (java.util.ArrayList)9 IOException (java.io.IOException)8 Test (org.testng.annotations.Test)7 SeekableInput (org.apache.avro.file.SeekableInput)6 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)6 Configuration (org.apache.hadoop.conf.Configuration)6 ReflectDatumReader (org.apache.avro.reflect.ReflectDatumReader)5 SeekableByteArrayInput (org.apache.avro.file.SeekableByteArrayInput)4 FsInput (org.apache.avro.mapred.FsInput)4 SpecificDatumReader (org.apache.avro.specific.SpecificDatumReader)4 Utf8 (org.apache.avro.util.Utf8)4 JsonObject (com.google.gson.JsonObject)2 AvroDag (edu.snu.mist.formats.avro.AvroDag)2 Date (java.sql.Date)2