use of org.apache.hudi.sink.partitioner.profile.WriteProfile in project hudi by apache.
the class TestBucketAssigner method testWriteProfileMetadataCache.
@Test
public void testWriteProfileMetadataCache() throws Exception {
WriteProfile writeProfile = new WriteProfile(writeConfig, context);
assertTrue(writeProfile.getMetadataCache().isEmpty(), "Empty table should no have any instant metadata");
// write 3 instants of data
for (int i = 0; i < 3; i++) {
TestData.writeData(TestData.DATA_SET_INSERT, conf);
}
// the record profile triggers the metadata loading
writeProfile.reload(1);
assertThat("Metadata cache should have same number entries as timeline instants", writeProfile.getMetadataCache().size(), is(3));
writeProfile.getSmallFiles("par1");
assertThat("The metadata should be reused", writeProfile.getMetadataCache().size(), is(3));
}
use of org.apache.hudi.sink.partitioner.profile.WriteProfile in project hudi by apache.
the class TestBucketAssigner method testWriteProfileReload.
@Test
public void testWriteProfileReload() throws Exception {
WriteProfile writeProfile = new WriteProfile(writeConfig, context);
List<SmallFile> smallFiles1 = writeProfile.getSmallFiles("par1");
assertTrue(smallFiles1.isEmpty(), "Should have no small files");
TestData.writeData(TestData.DATA_SET_INSERT, conf);
String instantOption = getLastCompleteInstant(writeProfile);
assertNull(instantOption);
writeProfile.reload(1);
String instant1 = getLastCompleteInstant(writeProfile);
assertNotNull(instant1);
List<SmallFile> smallFiles2 = writeProfile.getSmallFiles("par1");
assertThat("Should have 1 small file", smallFiles2.size(), is(1));
assertThat("Small file should have same timestamp as last complete instant", smallFiles2.get(0).location.getInstantTime(), is(instant1));
TestData.writeData(TestData.DATA_SET_INSERT, conf);
List<SmallFile> smallFiles3 = writeProfile.getSmallFiles("par1");
assertThat("Should have 1 small file", smallFiles3.size(), is(1));
assertThat("Non-reloaded write profile has the same base file view as before", smallFiles3.get(0).location.getInstantTime(), is(instant1));
writeProfile.reload(2);
String instant2 = getLastCompleteInstant(writeProfile);
assertNotEquals(instant2, instant1, "Should have new complete instant");
List<SmallFile> smallFiles4 = writeProfile.getSmallFiles("par1");
assertThat("Should have 1 small file", smallFiles4.size(), is(1));
assertThat("Small file should have same timestamp as last complete instant", smallFiles4.get(0).location.getInstantTime(), is(instant2));
}
use of org.apache.hudi.sink.partitioner.profile.WriteProfile in project hudi by apache.
the class BucketAssigners method create.
/**
* Creates a {@code BucketAssigner}.
*
* @param taskID The task ID
* @param maxParallelism The max parallelism
* @param numTasks The number of tasks
* @param ignoreSmallFiles Whether to ignore the small files
* @param tableType The table type
* @param context The engine context
* @param config The configuration
* @return the bucket assigner instance
*/
public static BucketAssigner create(int taskID, int maxParallelism, int numTasks, boolean ignoreSmallFiles, HoodieTableType tableType, HoodieFlinkEngineContext context, HoodieWriteConfig config) {
boolean delta = tableType.equals(HoodieTableType.MERGE_ON_READ);
WriteProfile writeProfile = WriteProfiles.singleton(ignoreSmallFiles, delta, config, context);
return new BucketAssigner(taskID, maxParallelism, numTasks, writeProfile, config);
}
Aggregations