use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.
the class TestStatsCommand method testFileSizeStats.
/**
* Test case for command 'stats filesizes'.
*/
@Test
public void testFileSizeStats() throws Exception {
String commit1 = "100";
String commit2 = "101";
Map<String, Integer[]> data = new LinkedHashMap<>();
data.put(commit1, new Integer[] { 100, 120, 150 });
data.put(commit2, new Integer[] { 200, 180, 250, 300 });
// generate data file
String partition1 = HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
String partition2 = HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH;
String partition3 = HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH;
HoodieTestTable testTable = HoodieTestTable.of(HoodieCLI.getTableMetaClient());
Integer[] data1 = data.get(commit1);
assertTrue(3 <= data1.length);
testTable.addCommit(commit1).withBaseFilesInPartition(partition1, data1[0]).withBaseFilesInPartition(partition2, data1[1]).withBaseFilesInPartition(partition3, data1[2]);
Integer[] data2 = data.get(commit2);
assertTrue(4 <= data2.length);
testTable.addCommit(commit2).withBaseFilesInPartition(partition1, data2[0]).withBaseFilesInPartition(partition2, data2[1], data2[2]).withBaseFilesInPartition(partition3, data2[3]);
CommandResult cr = shell().executeCommand("stats filesizes");
assertTrue(cr.isSuccess());
Histogram globalHistogram = new Histogram(new UniformReservoir(StatsCommand.MAX_FILES));
HashMap<String, Histogram> commitHistoMap = new HashMap<>();
data.forEach((k, v) -> {
commitHistoMap.put(k, new Histogram(new UniformReservoir(StatsCommand.MAX_FILES)));
for (int value : v) {
commitHistoMap.get(k).update(value);
globalHistogram.update(value);
}
});
// generate expect
List<Comparable[]> rows = new ArrayList<>();
for (Map.Entry<String, Histogram> entry : commitHistoMap.entrySet()) {
Snapshot s = entry.getValue().getSnapshot();
rows.add(new StatsCommand().printFileSizeHistogram(entry.getKey(), s));
}
Snapshot s = globalHistogram.getSnapshot();
rows.add(new StatsCommand().printFileSizeHistogram("ALL", s));
TableHeader header = new TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_COMMIT_TIME).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MIN).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_10TH).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_50TH).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_AVG).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_95TH).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_MAX).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_NUM_FILES).addTableHeaderField(HoodieTableHeaderFields.HEADER_HISTOGRAM_STD_DEV);
String expect = HoodiePrintHelper.print(header, new StatsCommand().getFieldNameToConverterMap(), "", false, -1, false, rows);
expect = removeNonWordAndStripSpace(expect);
String got = removeNonWordAndStripSpace(cr.getResult().toString());
assertEquals(expect, got);
}
use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.
the class TestFileSystemBackedTableMetadata method testDatePartitionedTableWithAssumeDateIsFalse.
/**
* Test listing of partitions result for date based partitions with assumeDataPartitioning = false.
* @throws Exception
*/
@Test
public void testDatePartitionedTableWithAssumeDateIsFalse() throws Exception {
String instant = "100";
hoodieTestTable = hoodieTestTable.addCommit(instant);
// Generate 10 files under each partition
DATE_PARTITIONS.stream().forEach(p -> {
try {
hoodieTestTable = hoodieTestTable.withPartitionMetaFiles(p).withBaseFilesInPartition(p, IntStream.range(0, 10).toArray());
} catch (Exception e) {
throw new RuntimeException(e);
}
});
HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
FileSystemBackedTableMetadata fileSystemBackedTableMetadata = new FileSystemBackedTableMetadata(localEngineContext, new SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
Assertions.assertEquals(3, fileSystemBackedTableMetadata.getAllPartitionPaths().size());
List<String> fullPartitionPaths = DATE_PARTITIONS.stream().map(p -> basePath + "/" + p).collect(Collectors.toList());
Map<String, FileStatus[]> partitionToFilesMap = fileSystemBackedTableMetadata.getAllFilesInPartitions(fullPartitionPaths);
for (String p : fullPartitionPaths) {
Assertions.assertEquals(10, partitionToFilesMap.get(p).length);
}
}
use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.
the class TestFileSystemBackedTableMetadata method testDatePartitionedTable.
/**
* Test listing of partitions result for date based partitions.
* @throws Exception
*/
@Test
public void testDatePartitionedTable() throws Exception {
String instant = "100";
hoodieTestTable = hoodieTestTable.addCommit(instant);
// Generate 10 files under each partition
DATE_PARTITIONS.stream().forEach(p -> {
try {
hoodieTestTable = hoodieTestTable.withBaseFilesInPartition(p, IntStream.range(0, 10).toArray());
} catch (Exception e) {
throw new RuntimeException(e);
}
});
HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
FileSystemBackedTableMetadata fileSystemBackedTableMetadata = new FileSystemBackedTableMetadata(localEngineContext, new SerializableConfiguration(metaClient.getHadoopConf()), basePath, true);
Assertions.assertEquals(3, fileSystemBackedTableMetadata.getAllPartitionPaths().size());
Assertions.assertEquals(10, fileSystemBackedTableMetadata.getAllFilesInPartition(new Path(basePath + "/" + DATE_PARTITIONS.get(0))).length);
List<String> fullPartitionPaths = DATE_PARTITIONS.stream().map(p -> basePath + "/" + p).collect(Collectors.toList());
Map<String, FileStatus[]> partitionToFilesMap = fileSystemBackedTableMetadata.getAllFilesInPartitions(fullPartitionPaths);
for (String p : fullPartitionPaths) {
Assertions.assertEquals(10, partitionToFilesMap.get(p).length);
}
}
use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.
the class TestFileSystemBackedTableMetadata method testMultiLevelEmptyPartitionTable.
@Test
public void testMultiLevelEmptyPartitionTable() throws Exception {
String instant = "100";
hoodieTestTable = hoodieTestTable.addCommit(instant);
// Generate 10 files under each partition
MULTI_LEVEL_PARTITIONS.stream().forEach(p -> {
try {
hoodieTestTable = hoodieTestTable.withPartitionMetaFiles(p);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
FileSystemBackedTableMetadata fileSystemBackedTableMetadata = new FileSystemBackedTableMetadata(localEngineContext, new SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
Assertions.assertEquals(3, fileSystemBackedTableMetadata.getAllPartitionPaths().size());
Assertions.assertEquals(0, fileSystemBackedTableMetadata.getAllFilesInPartition(new Path(basePath + "/" + MULTI_LEVEL_PARTITIONS.get(0))).length);
List<String> fullPartitionPaths = MULTI_LEVEL_PARTITIONS.stream().map(p -> basePath + "/" + p).collect(Collectors.toList());
Map<String, FileStatus[]> partitionToFilesMap = fileSystemBackedTableMetadata.getAllFilesInPartitions(fullPartitionPaths);
for (String p : fullPartitionPaths) {
Assertions.assertEquals(0, partitionToFilesMap.get(p).length);
}
}
use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.
the class TestClientRollback method testRollbackCommit.
/**
* Test Cases for effects of rolling back completed/inflight commits.
*/
@Test
public void testRollbackCommit() throws Exception {
// Let's create some commit files and base files
final String p1 = "2016/05/01";
final String p2 = "2016/05/02";
final String p3 = "2016/05/06";
final String commitTime1 = "20160501010101";
final String commitTime2 = "20160502020601";
final String commitTime3 = "20160506030611";
Map<String, String> partitionAndFileId1 = new HashMap<String, String>() {
{
put(p1, "id11");
put(p2, "id12");
put(p3, "id13");
}
};
Map<String, String> partitionAndFileId2 = new HashMap<String, String>() {
{
put(p1, "id21");
put(p2, "id22");
put(p3, "id23");
}
};
Map<String, String> partitionAndFileId3 = new HashMap<String, String>() {
{
put(p1, "id31");
put(p2, "id32");
put(p3, "id33");
}
};
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withRollbackUsingMarkers(false).withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).build()).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
HoodieTableMetadataWriter metadataWriter = SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context);
HoodieTestTable testTable = HoodieMetadataTestTable.of(metaClient, metadataWriter);
Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap1 = new HashMap<>();
partitionAndFileId1.forEach((k, v) -> partitionToFilesNameLengthMap1.put(k, Collections.singletonList(Pair.of(v, 100))));
testTable.doWriteOperation(commitTime1, WriteOperationType.INSERT, Arrays.asList(p1, p2, p3), partitionToFilesNameLengthMap1, false, false);
Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap2 = new HashMap<>();
partitionAndFileId2.forEach((k, v) -> partitionToFilesNameLengthMap2.put(k, Collections.singletonList(Pair.of(v, 200))));
testTable.doWriteOperation(commitTime2, WriteOperationType.INSERT, Collections.emptyList(), partitionToFilesNameLengthMap2, false, false);
Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap3 = new HashMap<>();
partitionAndFileId3.forEach((k, v) -> partitionToFilesNameLengthMap3.put(k, Collections.singletonList(Pair.of(v, 300))));
testTable.doWriteOperation(commitTime3, WriteOperationType.INSERT, Collections.emptyList(), partitionToFilesNameLengthMap3, false, true);
try (SparkRDDWriteClient client = getHoodieWriteClient(config)) {
// Rollback commit3
client.rollback(commitTime3);
assertFalse(testTable.inflightCommitExists(commitTime3));
assertFalse(testTable.baseFilesExist(partitionAndFileId3, commitTime3));
assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
// simulate partial failure, where .inflight was not deleted, but data files were.
testTable.addInflightCommit(commitTime3);
client.rollback(commitTime3);
assertFalse(testTable.inflightCommitExists(commitTime3));
assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
// Rollback commit2
client.rollback(commitTime2);
assertFalse(testTable.commitExists(commitTime2));
assertFalse(testTable.inflightCommitExists(commitTime2));
assertFalse(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
// simulate partial failure, where only .commit => .inflight renaming succeeded, leaving a
// .inflight commit and a bunch of data files around.
testTable.addInflightCommit(commitTime2).withBaseFilesInPartitions(partitionAndFileId2);
client.rollback(commitTime2);
assertFalse(testTable.commitExists(commitTime2));
assertFalse(testTable.inflightCommitExists(commitTime2));
assertFalse(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
// Let's rollback commit1, Check results
client.rollback(commitTime1);
assertFalse(testTable.commitExists(commitTime1));
assertFalse(testTable.inflightCommitExists(commitTime1));
assertFalse(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
}
}
Aggregations