use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.
the class TestUpsertPartitioner method testUpsertPartitionerWithSmallFileHandlingWithInflightCompactionWithCanIndexLogFiles.
@Test
public void testUpsertPartitionerWithSmallFileHandlingWithInflightCompactionWithCanIndexLogFiles() throws Exception {
// Note this is used because it is same partition path used in CompactionTestUtils.createCompactionPlan()
final String testPartitionPath = DEFAULT_PARTITION_PATHS[0];
HoodieWriteConfig config = makeHoodieClientConfigBuilder().withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024).build()).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.HBASE).withHBaseIndexConfig(HoodieHBaseIndexConfig.newBuilder().build()).build()).build();
// This will generate initial commits and create a compaction plan which includes file groups created as part of this
HoodieCompactionPlan plan = CompactionTestUtils.createCompactionPlan(metaClient, "001", "002", 1, true, false);
FileCreateUtils.createRequestedCompactionCommit(basePath, "002", plan);
// Simulate one more commit so that inflight compaction is considered when building file groups in file system view
FileCreateUtils.createBaseFile(basePath, testPartitionPath, "003", "2", 1);
FileCreateUtils.createCommit(basePath, "003");
// Partitioner will attempt to assign inserts to file groups including base file created by inflight compaction
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator(new String[] { testPartitionPath });
List<HoodieRecord> insertRecords = dataGenerator.generateInserts("004", 100);
WorkloadProfile profile = new WorkloadProfile(buildProfile(jsc.parallelize(insertRecords)));
HoodieSparkTable table = HoodieSparkTable.create(config, context, metaClient);
SparkUpsertDeltaCommitPartitioner partitioner = new SparkUpsertDeltaCommitPartitioner(profile, context, table, config);
assertEquals(1, partitioner.numPartitions(), "Should have 1 partitions");
assertEquals(BucketType.UPDATE, partitioner.getBucketInfo(0).bucketType, "Bucket 0 is UPDATE");
assertEquals("2", partitioner.getBucketInfo(0).fileIdPrefix, "Should be assigned to only file id not pending compaction which is 2");
}
use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.
the class TestUpsertPartitioner method testUpsertPartitionerWithSmallFileHandlingAndClusteringPlan.
@Test
public void testUpsertPartitionerWithSmallFileHandlingAndClusteringPlan() throws Exception {
final String testPartitionPath = DEFAULT_PARTITION_PATHS[0];
// create HoodieWriteConfig and set inline and async clustering disable here.
HoodieWriteConfig config = makeHoodieClientConfigBuilder().withCompactionConfig(HoodieCompactionConfig.newBuilder().build()).withClusteringConfig(HoodieClusteringConfig.newBuilder().withInlineClustering(false).withAsyncClustering(false).build()).withStorageConfig(HoodieStorageConfig.newBuilder().hfileMaxFileSize(1000 * 1024).parquetMaxFileSize(1000 * 1024).build()).build();
// create file slice with instantTime 001 and build clustering plan including this created 001 file slice.
HoodieClusteringPlan clusteringPlan = ClusteringTestUtils.createClusteringPlan(metaClient, "001", "1");
// create requested replace commit
HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder().setClusteringPlan(clusteringPlan).setOperationType(WriteOperationType.CLUSTER.name()).build();
FileCreateUtils.createRequestedReplaceCommit(basePath, "002", Option.of(requestedReplaceMetadata));
// create file slice 003
FileCreateUtils.createBaseFile(basePath, testPartitionPath, "003", "3", 1);
FileCreateUtils.createCommit(basePath, "003");
metaClient = HoodieTableMetaClient.reload(metaClient);
// generate new data to be ingested
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator(new String[] { testPartitionPath });
List<HoodieRecord> insertRecords = dataGenerator.generateInserts("004", 100);
WorkloadProfile profile = new WorkloadProfile(buildProfile(jsc.parallelize(insertRecords)));
HoodieSparkTable table = HoodieSparkTable.create(config, context, metaClient);
// create UpsertPartitioner
UpsertPartitioner partitioner = new UpsertPartitioner(profile, context, table, config);
// for now we have file slice1 and file slice3 and file slice1 is contained in pending clustering plan
// So that only file slice3 can be used for ingestion.
assertEquals(1, partitioner.smallFiles.size(), "Should have 1 small file to be ingested.");
}
use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.
the class TestArchivedCommitsCommand method init.
@BeforeEach
public void init() throws Exception {
HoodieCLI.conf = hadoopConf();
// Create table and connect
String tableName = tableName();
tablePath = tablePath(tableName);
new TableCommand().createTable(tablePath, tableName, "COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload");
HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
// Generate archive
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath).withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).forTable("test-trip-table").build();
// Create six commits
for (int i = 100; i < 106; i++) {
String timestamp = String.valueOf(i);
// Requested Compaction
HoodieTestCommitMetadataGenerator.createCompactionAuxiliaryMetadata(tablePath, new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, timestamp), hadoopConf());
// Inflight Compaction
HoodieTestCommitMetadataGenerator.createCompactionAuxiliaryMetadata(tablePath, new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, timestamp), hadoopConf());
HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, timestamp, hadoopConf());
}
// Simulate a compaction commit in metadata table timeline
// so the archival in data table can happen
HoodieTestUtils.createCompactionCommitInMetadataTable(hadoopConf(), metaClient.getFs(), tablePath, "105");
metaClient = HoodieTableMetaClient.reload(metaClient);
// reload the timeline and get all the commits before archive
metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
// archive
HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
archiver.archiveIfRequired(context());
}
use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.
the class TestHoodieClientOnCopyOnWriteStorage method testCommitWritesRelativePaths.
/**
* Test to ensure commit metadata points to valid files.
*/
@ParameterizedTest
@MethodSource("populateMetaFieldsParams")
public void testCommitWritesRelativePaths(boolean populateMetaFields) throws Exception {
HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder().withAutoCommit(false);
addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
try (SparkRDDWriteClient client = getHoodieWriteClient(cfgBuilder.build())) {
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
HoodieSparkTable table = HoodieSparkTable.create(cfgBuilder.build(), context, metaClient);
String instantTime = "000";
client.startCommitWithTime(instantTime);
List<HoodieRecord> records = dataGen.generateInserts(instantTime, 200);
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, instantTime);
assertTrue(client.commit(instantTime, result), "Commit should succeed");
assertTrue(testTable.commitExists(instantTime), "After explicit commit, commit file should be created");
// Get base file paths from commit metadata
String actionType = metaClient.getCommitActionType();
HoodieInstant commitInstant = new HoodieInstant(false, actionType, instantTime);
HoodieTimeline commitTimeline = metaClient.getCommitTimeline().filterCompletedInstants();
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(commitTimeline.getInstantDetails(commitInstant).get(), HoodieCommitMetadata.class);
String basePath = table.getMetaClient().getBasePath();
Collection<String> commitPathNames = commitMetadata.getFileIdAndFullPaths(basePath).values();
// Read from commit file
try (FSDataInputStream inputStream = fs.open(testTable.getCommitFilePath(instantTime))) {
String everything = FileIOUtils.readAsUTFString(inputStream);
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromJsonString(everything, HoodieCommitMetadata.class);
HashMap<String, String> paths = metadata.getFileIdAndFullPaths(basePath);
// Compare values in both to make sure they are equal.
for (String pathName : paths.values()) {
assertTrue(commitPathNames.contains(pathName));
}
}
}
}
use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.
the class TestUpdateSchemaEvolution method assertSchemaEvolutionOnUpdateResult.
private void assertSchemaEvolutionOnUpdateResult(WriteStatus insertResult, HoodieSparkTable updateTable, List<HoodieRecord> updateRecords, String assertMsg, boolean isAssertThrow, Class expectedExceptionType) {
jsc.parallelize(Arrays.asList(1)).map(x -> {
Executable executable = () -> {
HoodieMergeHandle mergeHandle = new HoodieMergeHandle(updateTable.getConfig(), "101", updateTable, updateRecords.iterator(), updateRecords.get(0).getPartitionPath(), insertResult.getFileId(), supplier, Option.empty());
List<GenericRecord> oldRecords = BaseFileUtils.getInstance(updateTable.getBaseFileFormat()).readAvroRecords(updateTable.getHadoopConf(), new Path(updateTable.getConfig().getBasePath() + "/" + insertResult.getStat().getPath()), mergeHandle.getWriterSchemaWithMetaFields());
for (GenericRecord rec : oldRecords) {
mergeHandle.write(rec);
}
mergeHandle.close();
};
if (isAssertThrow) {
assertThrows(expectedExceptionType, executable, assertMsg);
} else {
assertDoesNotThrow(executable, assertMsg);
}
return 1;
}).collect();
}
Aggregations