use of org.apache.hudi.table.action.commit.SmallFile in project hudi by apache.
the class TestBucketAssigner method testWriteProfileReload.
@Test
public void testWriteProfileReload() throws Exception {
WriteProfile writeProfile = new WriteProfile(writeConfig, context);
List<SmallFile> smallFiles1 = writeProfile.getSmallFiles("par1");
assertTrue(smallFiles1.isEmpty(), "Should have no small files");
TestData.writeData(TestData.DATA_SET_INSERT, conf);
String instantOption = getLastCompleteInstant(writeProfile);
assertNull(instantOption);
writeProfile.reload(1);
String instant1 = getLastCompleteInstant(writeProfile);
assertNotNull(instant1);
List<SmallFile> smallFiles2 = writeProfile.getSmallFiles("par1");
assertThat("Should have 1 small file", smallFiles2.size(), is(1));
assertThat("Small file should have same timestamp as last complete instant", smallFiles2.get(0).location.getInstantTime(), is(instant1));
TestData.writeData(TestData.DATA_SET_INSERT, conf);
List<SmallFile> smallFiles3 = writeProfile.getSmallFiles("par1");
assertThat("Should have 1 small file", smallFiles3.size(), is(1));
assertThat("Non-reloaded write profile has the same base file view as before", smallFiles3.get(0).location.getInstantTime(), is(instant1));
writeProfile.reload(2);
String instant2 = getLastCompleteInstant(writeProfile);
assertNotEquals(instant2, instant1, "Should have new complete instant");
List<SmallFile> smallFiles4 = writeProfile.getSmallFiles("par1");
assertThat("Should have 1 small file", smallFiles4.size(), is(1));
assertThat("Small file should have same timestamp as last complete instant", smallFiles4.get(0).location.getInstantTime(), is(instant2));
}
use of org.apache.hudi.table.action.commit.SmallFile in project hudi by apache.
the class TestBucketAssigner method testSmallFilesOfThisTask.
/**
* Test that the file ids generated by the task can finally shuffled to itself.
*/
@Test
void testSmallFilesOfThisTask() {
MockBucketAssigner mockBucketAssigner1 = new MockBucketAssigner(context, writeConfig);
String fileId1 = mockBucketAssigner1.createFileIdOfThisTask();
SmallFile smallFile1 = new SmallFile();
smallFile1.location = new HoodieRecordLocation("t0", fileId1);
smallFile1.sizeBytes = 123;
List<SmallFile> smallFiles1 = mockBucketAssigner1.smallFilesOfThisTask(Collections.singletonList(smallFile1));
assertThat(smallFiles1.size(), is(1));
// modify the parallelism and test again
MockBucketAssigner mockBucketAssigner2 = new MockBucketAssigner(123, 200, context, writeConfig, Collections.emptyMap());
String fileId2 = mockBucketAssigner2.createFileIdOfThisTask();
SmallFile smallFile2 = new SmallFile();
smallFile2.location = new HoodieRecordLocation("t0", fileId2);
smallFile2.sizeBytes = 123;
String fileId3 = mockBucketAssigner2.createFileIdOfThisTask();
SmallFile smallFile3 = new SmallFile();
smallFile3.location = new HoodieRecordLocation("t0", fileId3);
smallFile3.sizeBytes = 456;
List<SmallFile> smallFiles2 = mockBucketAssigner1.smallFilesOfThisTask(Arrays.asList(smallFile2, smallFile3));
assertThat(smallFiles2.size(), is(2));
}
use of org.apache.hudi.table.action.commit.SmallFile in project hudi by apache.
the class TestBucketAssigner method testUpdateAndInsertWithSmallFiles.
@Test
public void testUpdateAndInsertWithSmallFiles() {
SmallFile f0 = new SmallFile();
f0.location = new HoodieRecordLocation("t0", "f0");
f0.sizeBytes = 12;
SmallFile f1 = new SmallFile();
f1.location = new HoodieRecordLocation("t0", "f1");
// no left space to append new records to this bucket
f1.sizeBytes = 122879;
SmallFile f2 = new SmallFile();
f2.location = new HoodieRecordLocation("t0", "f2");
f2.sizeBytes = 56;
Map<String, List<SmallFile>> smallFilesMap = new HashMap<>();
smallFilesMap.put("par1", Arrays.asList(f0, f1));
smallFilesMap.put("par2", Collections.singletonList(f2));
MockBucketAssigner mockBucketAssigner = new MockBucketAssigner(context, writeConfig, smallFilesMap);
mockBucketAssigner.addUpdate("par1", "f0");
BucketInfo bucketInfo = mockBucketAssigner.addInsert("par1");
assertBucketEquals(bucketInfo, "par1", BucketType.UPDATE, "f0");
mockBucketAssigner.addInsert("par1");
bucketInfo = mockBucketAssigner.addInsert("par1");
assertBucketEquals(bucketInfo, "par1", BucketType.UPDATE, "f0");
mockBucketAssigner.addUpdate("par1", "f2");
mockBucketAssigner.addInsert("par1");
bucketInfo = mockBucketAssigner.addInsert("par1");
assertBucketEquals(bucketInfo, "par1", BucketType.UPDATE, "f0");
mockBucketAssigner.addUpdate("par2", "f0");
mockBucketAssigner.addInsert("par2");
bucketInfo = mockBucketAssigner.addInsert("par2");
assertBucketEquals(bucketInfo, "par2", BucketType.UPDATE, "f2");
}
use of org.apache.hudi.table.action.commit.SmallFile in project hudi by apache.
the class TestBucketAssigner method testInsertWithPartialSmallFiles.
/**
* Test that only partial small files are assigned to the task.
*/
@Test
public void testInsertWithPartialSmallFiles() {
SmallFile f0 = new SmallFile();
f0.location = new HoodieRecordLocation("t0", "f0");
f0.sizeBytes = 12;
SmallFile f1 = new SmallFile();
f1.location = new HoodieRecordLocation("t0", "f1");
// no left space to append new records to this bucket
f1.sizeBytes = 122879;
SmallFile f2 = new SmallFile();
f2.location = new HoodieRecordLocation("t0", "f2");
f2.sizeBytes = 56;
Map<String, List<SmallFile>> smallFilesMap = new HashMap<>();
smallFilesMap.put("par1", Arrays.asList(f0, f1, f2));
MockBucketAssigner mockBucketAssigner = new MockBucketAssigner(0, 2, context, writeConfig, smallFilesMap);
BucketInfo bucketInfo = mockBucketAssigner.addInsert("par1");
assertBucketEquals(bucketInfo, "par1", BucketType.UPDATE, "f2");
mockBucketAssigner.addInsert("par1");
bucketInfo = mockBucketAssigner.addInsert("par1");
assertBucketEquals(bucketInfo, "par1", BucketType.UPDATE, "f2");
bucketInfo = mockBucketAssigner.addInsert("par3");
assertBucketEquals(bucketInfo, "par3", BucketType.INSERT);
bucketInfo = mockBucketAssigner.addInsert("par3");
assertBucketEquals(bucketInfo, "par3", BucketType.INSERT);
MockBucketAssigner mockBucketAssigner2 = new MockBucketAssigner(1, 2, context, writeConfig, smallFilesMap);
BucketInfo bucketInfo2 = mockBucketAssigner2.addInsert("par1");
assertBucketEquals(bucketInfo2, "par1", BucketType.UPDATE, "f0");
mockBucketAssigner2.addInsert("par1");
bucketInfo2 = mockBucketAssigner2.addInsert("par1");
assertBucketEquals(bucketInfo2, "par1", BucketType.UPDATE, "f0");
bucketInfo2 = mockBucketAssigner2.addInsert("par3");
assertBucketEquals(bucketInfo2, "par3", BucketType.INSERT);
bucketInfo2 = mockBucketAssigner2.addInsert("par3");
assertBucketEquals(bucketInfo2, "par3", BucketType.INSERT);
}
Aggregations