use of org.apache.avro.file.DataFileReader in project mist by snuspl.
the class DefaultGroupCheckpointStore method loadSavedQueries.
@Override
public List<AvroDag> loadSavedQueries(final List<String> queryIdList) throws IOException {
final List<AvroDag> savedQueries = new ArrayList<>();
for (final String queryId : queryIdList) {
final File storedFile = getQueryStoreFile(queryId);
final DataFileReader<AvroDag> dataFileReader = new DataFileReader<>(storedFile, avroDagDatumReader);
AvroDag avroDag = null;
avroDag = dataFileReader.next(avroDag);
savedQueries.add(avroDag);
}
return savedQueries;
}
use of org.apache.avro.file.DataFileReader in project mist by snuspl.
the class DefaultGroupCheckpointStore method loadSavedGroupState.
@Override
public GroupCheckpoint loadSavedGroupState(final String groupId) throws IOException {
// Load the file.
final File storedFile = getGroupCheckpointFile(groupId);
final DataFileReader<GroupCheckpoint> dataFileReader = new DataFileReader<>(storedFile, groupCheckpointDatumReader);
GroupCheckpoint mgc = null;
mgc = dataFileReader.next(mgc);
if (mgc != null) {
LOG.log(Level.INFO, "Checkpoint file found. groupId is " + groupId);
} else {
LOG.log(Level.WARNING, "Checkpoint file not found or error during loading. groupId is " + groupId);
}
return mgc;
}
use of org.apache.avro.file.DataFileReader in project mist by snuspl.
the class AvroExecutionVertexStore method loadAvroPhysicalOperatorChain.
/**
* Loads the AvroPhysicalOperatorChain with the chainId.
*/
public AvroPhysicalOperatorChain loadAvroPhysicalOperatorChain(final String chainId) throws IOException {
try {
final File storedChain = getAvroPhysicalOperatorChainFile(chainId);
final DataFileReader<AvroPhysicalOperatorChain> dataFileReader = new DataFileReader<>(storedChain, operatorChainDatumReader);
AvroPhysicalOperatorChain avroPhysicalOperatorChain = null;
avroPhysicalOperatorChain = dataFileReader.next(avroPhysicalOperatorChain);
return avroPhysicalOperatorChain;
} catch (final IOException e) {
LOG.log(Level.SEVERE, "An exception occurred while loading the AvroPhysicalOperatorChain with ID {0}.", new Object[] { chainId });
throw e;
}
}
use of org.apache.avro.file.DataFileReader in project flink by apache.
the class AvroOutputFormatITCase method postSubmit.
@Override
protected void postSubmit() throws Exception {
//compare result for specific user type
File[] output1;
File file1 = asFile(outputPath1);
if (file1.isDirectory()) {
output1 = file1.listFiles();
// check for avro ext in dir.
for (File avroOutput : output1) {
Assert.assertTrue("Expect extension '.avro'", avroOutput.toString().endsWith(".avro"));
}
} else {
output1 = new File[] { file1 };
}
List<String> result1 = new ArrayList<String>();
DatumReader<User> userDatumReader1 = new SpecificDatumReader<User>(User.class);
for (File avroOutput : output1) {
DataFileReader<User> dataFileReader1 = new DataFileReader<User>(avroOutput, userDatumReader1);
while (dataFileReader1.hasNext()) {
User user = dataFileReader1.next();
result1.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor());
}
}
for (String expectedResult : userData.split("\n")) {
Assert.assertTrue("expected user " + expectedResult + " not found.", result1.contains(expectedResult));
}
//compare result for reflect user type
File[] output2;
File file2 = asFile(outputPath2);
if (file2.isDirectory()) {
output2 = file2.listFiles();
} else {
output2 = new File[] { file2 };
}
List<String> result2 = new ArrayList<String>();
DatumReader<ReflectiveUser> userDatumReader2 = new ReflectDatumReader<ReflectiveUser>(ReflectiveUser.class);
for (File avroOutput : output2) {
DataFileReader<ReflectiveUser> dataFileReader2 = new DataFileReader<ReflectiveUser>(avroOutput, userDatumReader2);
while (dataFileReader2.hasNext()) {
ReflectiveUser user = dataFileReader2.next();
result2.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor());
}
}
for (String expectedResult : userData.split("\n")) {
Assert.assertTrue("expected user " + expectedResult + " not found.", result2.contains(expectedResult));
}
}
use of org.apache.avro.file.DataFileReader in project beam by apache.
the class AvroIOTest method testWindowedAvroIOWrite.
@Test
@Category({ ValidatesRunner.class, UsesTestStream.class })
public void testWindowedAvroIOWrite() throws Throwable {
Path baseDir = Files.createTempDirectory(tmpFolder.getRoot().toPath(), "testwrite");
String baseFilename = baseDir.resolve("prefix").toString();
Instant base = new Instant(0);
ArrayList<GenericClass> allElements = new ArrayList<>();
ArrayList<TimestampedValue<GenericClass>> firstWindowElements = new ArrayList<>();
ArrayList<Instant> firstWindowTimestamps = Lists.newArrayList(base.plus(Duration.standardSeconds(0)), base.plus(Duration.standardSeconds(10)), base.plus(Duration.standardSeconds(20)), base.plus(Duration.standardSeconds(30)));
Random random = new Random();
for (int i = 0; i < 100; ++i) {
GenericClass item = new GenericClass(i, String.valueOf(i));
allElements.add(item);
firstWindowElements.add(TimestampedValue.of(item, firstWindowTimestamps.get(random.nextInt(firstWindowTimestamps.size()))));
}
ArrayList<TimestampedValue<GenericClass>> secondWindowElements = new ArrayList<>();
ArrayList<Instant> secondWindowTimestamps = Lists.newArrayList(base.plus(Duration.standardSeconds(60)), base.plus(Duration.standardSeconds(70)), base.plus(Duration.standardSeconds(80)), base.plus(Duration.standardSeconds(90)));
for (int i = 100; i < 200; ++i) {
GenericClass item = new GenericClass(i, String.valueOf(i));
allElements.add(new GenericClass(i, String.valueOf(i)));
secondWindowElements.add(TimestampedValue.of(item, secondWindowTimestamps.get(random.nextInt(secondWindowTimestamps.size()))));
}
TimestampedValue<GenericClass>[] firstWindowArray = firstWindowElements.toArray(new TimestampedValue[100]);
TimestampedValue<GenericClass>[] secondWindowArray = secondWindowElements.toArray(new TimestampedValue[100]);
TestStream<GenericClass> values = TestStream.create(AvroCoder.of(GenericClass.class)).advanceWatermarkTo(new Instant(0)).addElements(firstWindowArray[0], Arrays.copyOfRange(firstWindowArray, 1, firstWindowArray.length)).advanceWatermarkTo(new Instant(0).plus(Duration.standardMinutes(1))).addElements(secondWindowArray[0], Arrays.copyOfRange(secondWindowArray, 1, secondWindowArray.length)).advanceWatermarkToInfinity();
FilenamePolicy policy = new WindowedFilenamePolicy(baseFilename);
windowedAvroWritePipeline.apply(values).apply(Window.<GenericClass>into(FixedWindows.of(Duration.standardMinutes(1)))).apply(AvroIO.write(GenericClass.class).to(baseFilename).withFilenamePolicy(policy).withWindowedWrites().withNumShards(2));
windowedAvroWritePipeline.run();
// Validate that the data written matches the expected elements in the expected order
List<File> expectedFiles = new ArrayList<>();
for (int shard = 0; shard < 2; shard++) {
for (int window = 0; window < 2; window++) {
Instant windowStart = new Instant(0).plus(Duration.standardMinutes(window));
IntervalWindow intervalWindow = new IntervalWindow(windowStart, Duration.standardMinutes(1));
expectedFiles.add(new File(baseFilename + "-" + intervalWindow.toString() + "-" + shard + "-of-1" + "-pane-0-final"));
}
}
List<GenericClass> actualElements = new ArrayList<>();
for (File outputFile : expectedFiles) {
assertTrue("Expected output file " + outputFile.getAbsolutePath(), outputFile.exists());
try (DataFileReader<GenericClass> reader = new DataFileReader<>(outputFile, new ReflectDatumReader<GenericClass>(ReflectData.get().getSchema(GenericClass.class)))) {
Iterators.addAll(actualElements, reader);
}
outputFile.delete();
}
assertThat(actualElements, containsInAnyOrder(allElements.toArray()));
}
Aggregations