use of org.apache.hudi.integ.testsuite.writer.DeltaWriteStats in project hudi by apache.
the class DeltaGenerator method writeRecords.
public JavaRDD<DeltaWriteStats> writeRecords(JavaRDD<GenericRecord> records) {
if (deltaOutputConfig.shouldDeleteOldInputData() && batchId > 1) {
Path oldInputDir = new Path(deltaOutputConfig.getDeltaBasePath(), Integer.toString(batchId - 1));
try {
FileSystem fs = FSUtils.getFs(oldInputDir.toString(), deltaOutputConfig.getConfiguration());
fs.delete(oldInputDir, true);
} catch (IOException e) {
log.error("Failed to delete older input data direcory " + oldInputDir, e);
}
}
// The following creates a new anonymous function for iterator and hence results in serialization issues
JavaRDD<DeltaWriteStats> ws = records.mapPartitions(itr -> {
try {
DeltaWriterAdapter<GenericRecord> deltaWriterAdapter = DeltaWriterFactory.getDeltaWriterAdapter(deltaOutputConfig, batchId);
return Collections.singletonList(deltaWriterAdapter.write(itr)).iterator();
} catch (IOException io) {
throw new UncheckedIOException(io);
}
}).flatMap(List::iterator);
batchId++;
return ws;
}
use of org.apache.hudi.integ.testsuite.writer.DeltaWriteStats in project hudi by apache.
the class TestDFSHoodieTestSuiteWriterAdapter method testDFSTwoFilesWriteWithRollover.
@Test
public void testDFSTwoFilesWriteWithRollover() throws IOException {
DeltaInputWriter<GenericRecord> mockFileSinkWriter = Mockito.mock(AvroFileDeltaInputWriter.class);
DeltaWriteStats mockDeltaWriteStats = Mockito.mock(DeltaWriteStats.class);
when(mockFileSinkWriter.getNewWriter()).thenReturn(mockFileSinkWriter);
when(mockFileSinkWriter.canWrite()).thenReturn(false, true);
when(mockFileSinkWriter.getDeltaWriteStats()).thenReturn(mockDeltaWriteStats);
DeltaWriterAdapter<GenericRecord> dfsDeltaWriterAdapter = new DFSDeltaWriterAdapter(mockFileSinkWriter);
Iterator<GenericRecord> mockIterator = Mockito.mock(Iterator.class);
when(mockIterator.hasNext()).thenReturn(true, true, true, false);
dfsDeltaWriterAdapter.write(mockIterator);
Mockito.verify(mockFileSinkWriter, times(2)).canWrite();
Mockito.verify(mockFileSinkWriter, times(1)).getNewWriter();
Mockito.verify(mockFileSinkWriter, times(2)).close();
}
use of org.apache.hudi.integ.testsuite.writer.DeltaWriteStats in project hudi by apache.
the class HoodieTestSuiteWriter method commitCompaction.
public void commitCompaction(JavaRDD<WriteStatus> records, JavaRDD<DeltaWriteStats> generatedDataStats, Option<String> instantTime) throws IOException {
if (!cfg.useDeltaStreamer) {
Map<String, String> extraMetadata = new HashMap<>();
/**
* Store the checkpoint in the commit metadata just like
* {@link HoodieDeltaStreamer#commit(SparkRDDWriteClient, JavaRDD, Option)} *
*/
extraMetadata.put(HoodieDeltaStreamerWrapper.CHECKPOINT_KEY, lastCheckpoint.get());
if (generatedDataStats != null && generatedDataStats.count() > 1) {
// Just stores the path where this batch of data is generated to
extraMetadata.put(GENERATED_DATA_PATH, generatedDataStats.map(s -> s.getFilePath()).collect().get(0));
}
HoodieSparkTable<HoodieRecordPayload> table = HoodieSparkTable.create(writeClient.getConfig(), writeClient.getEngineContext());
HoodieCommitMetadata metadata = CompactHelpers.getInstance().createCompactionMetadata(table, instantTime.get(), HoodieJavaRDD.of(records), writeClient.getConfig().getSchema());
writeClient.commitCompaction(instantTime.get(), metadata, Option.of(extraMetadata));
}
}
use of org.apache.hudi.integ.testsuite.writer.DeltaWriteStats in project hudi by apache.
the class TestDFSHoodieTestSuiteWriterAdapter method testDFSOneFileWrite.
@Test
public void testDFSOneFileWrite() throws IOException {
DeltaInputWriter<GenericRecord> mockFileSinkWriter = Mockito.mock(AvroFileDeltaInputWriter.class);
DeltaWriteStats mockDeltaWriteStats = Mockito.mock(DeltaWriteStats.class);
when(mockFileSinkWriter.getNewWriter()).thenReturn(mockFileSinkWriter);
when(mockFileSinkWriter.canWrite()).thenReturn(true);
when(mockFileSinkWriter.getDeltaWriteStats()).thenReturn(mockDeltaWriteStats);
DeltaWriterAdapter<GenericRecord> dfsDeltaWriterAdapter = new DFSDeltaWriterAdapter(mockFileSinkWriter);
JavaRDD<GenericRecord> records = TestUtils.makeRDD(jsc, 10);
dfsDeltaWriterAdapter.write(records.collect().iterator());
Mockito.verify(mockFileSinkWriter, times(10)).canWrite();
Mockito.verify(mockFileSinkWriter, times(1)).close();
}
use of org.apache.hudi.integ.testsuite.writer.DeltaWriteStats in project hudi by apache.
the class TestFileDeltaInputWriter method testAvroFileSinkCreateNewWriter.
@Test
public void testAvroFileSinkCreateNewWriter() throws IOException {
// 1. Create a Avro File Sink Writer
DeltaInputWriter<GenericRecord> fileSinkWriter = new AvroFileDeltaInputWriter(jsc.hadoopConfiguration(), dfsBasePath, schemaProvider.getSourceSchema().toString(), 1024 * 1024L);
GenericRecordFullPayloadGenerator payloadGenerator = new GenericRecordFullPayloadGenerator(schemaProvider.getSourceSchema());
// 2. Generate 100 avro payloads and write them to an avro file
IntStream.range(0, 100).forEach(a -> {
try {
fileSinkWriter.writeData(payloadGenerator.getNewPayload());
} catch (IOException io) {
throw new UncheckedIOException(io);
}
});
fileSinkWriter.close();
String oldFilePath = fileSinkWriter.getDeltaWriteStats().getFilePath();
assertFalse(oldFilePath == null);
DeltaInputWriter<GenericRecord> newFileSinkWriter = fileSinkWriter.getNewWriter();
newFileSinkWriter.close();
DeltaWriteStats newStats = newFileSinkWriter.getDeltaWriteStats();
assertEquals(newStats.getBytesWritten(), 3674);
assertEquals(newStats.getRecordsWritten(), 0);
assertTrue(newStats.getFilePath() != null);
}
Aggregations