use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.
the class TestUpdateSchemaEvolution method testSchemaEvolutionOnUpdateMisMatchWithChangeColumnType.
@Test
public void testSchemaEvolutionOnUpdateMisMatchWithChangeColumnType() throws Exception {
final WriteStatus insertResult = prepareFirstRecordCommit(generateOneRecordForExampleSchema());
// Now try an update with an evolved schema
// Evolved schema does not have guarantee on preserving the original field ordering
final HoodieWriteConfig config = makeHoodieClientConfig("/exampleEvolvedSchemaColumnType.avsc");
final HoodieSparkTable table = HoodieSparkTable.create(config, context);
String recordStr = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\"," + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":\"12\"}";
List<HoodieRecord> updateRecords = buildUpdateRecords(recordStr, insertResult.getFileId());
String assertMsg = "UpdateFunction when change column type, org.apache.parquet.avro.AvroConverters$FieldUTF8Converter";
assertSchemaEvolutionOnUpdateResult(insertResult, table, updateRecords, assertMsg, true, ParquetDecodingException.class);
}
use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.
the class TestUpdateSchemaEvolution method prepareFirstRecordCommit.
private WriteStatus prepareFirstRecordCommit(List<String> recordsStrs) throws IOException {
// Create a bunch of records with an old version of schema
final HoodieWriteConfig config = makeHoodieClientConfig("/exampleSchema.avsc");
final HoodieSparkTable table = HoodieSparkTable.create(config, context);
final List<WriteStatus> statuses = jsc.parallelize(Arrays.asList(1)).map(x -> {
List<HoodieRecord> insertRecords = new ArrayList<>();
for (String recordStr : recordsStrs) {
RawTripTestPayload rowChange = new RawTripTestPayload(recordStr);
insertRecords.add(new HoodieAvroRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
}
Map<String, HoodieRecord> insertRecordMap = insertRecords.stream().collect(Collectors.toMap(r -> r.getRecordKey(), Function.identity()));
HoodieCreateHandle<?, ?, ?, ?> createHandle = new HoodieCreateHandle(config, "100", table, insertRecords.get(0).getPartitionPath(), "f1-0", insertRecordMap, supplier);
createHandle.write();
return createHandle.close().get(0);
}).collect();
final Path commitFile = new Path(config.getBasePath() + "/.hoodie/" + HoodieTimeline.makeCommitFileName("100"));
FSUtils.getFs(basePath, HoodieTestUtils.getDefaultHadoopConf()).create(commitFile);
return statuses.get(0);
}
use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.
the class TestUpdateSchemaEvolution method testSchemaEvolutionOnUpdateSuccessWithAddColumnHaveDefault.
@Test
public void testSchemaEvolutionOnUpdateSuccessWithAddColumnHaveDefault() throws Exception {
final WriteStatus insertResult = prepareFirstRecordCommit(generateMultipleRecordsForExampleSchema());
// Now try an update with an evolved schema
// Evolved schema does not have guarantee on preserving the original field ordering
final HoodieWriteConfig config = makeHoodieClientConfig("/exampleEvolvedSchema.avsc");
final HoodieSparkTable table = HoodieSparkTable.create(config, context);
// New content with values for the newly added field
String recordStr = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\"," + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12,\"added_field\":1}";
List<HoodieRecord> updateRecords = buildUpdateRecords(recordStr, insertResult.getFileId());
String assertMsg = "UpdateFunction could not read records written with exampleSchema.avsc using the " + "exampleEvolvedSchema.avsc";
assertSchemaEvolutionOnUpdateResult(insertResult, table, updateRecords, assertMsg, false, null);
}
use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.
the class TestClientRollback method testSavepointAndRollback.
/**
* Test case for rollback-savepoint interaction.
*/
@Test
public void testSavepointAndRollback() throws Exception {
HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(1).build()).build();
try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
HoodieTestDataGenerator.writePartitionMetadataDeprecated(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, basePath);
/**
* Write 1 (only inserts)
*/
String newCommitTime = "001";
client.startCommitWithTime(newCommitTime);
List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
assertNoWriteErrors(statuses);
/**
* Write 2 (updates)
*/
newCommitTime = "002";
client.startCommitWithTime(newCommitTime);
records = dataGen.generateUpdates(newCommitTime, records);
statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
// Verify there are no errors
assertNoWriteErrors(statuses);
client.savepoint("hoodie-unit-test", "test");
/**
* Write 3 (updates)
*/
newCommitTime = "003";
client.startCommitWithTime(newCommitTime);
records = dataGen.generateUpdates(newCommitTime, records);
statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
// Verify there are no errors
assertNoWriteErrors(statuses);
HoodieWriteConfig config = getConfig();
List<String> partitionPaths = FSUtils.getAllPartitionPaths(context, config.getMetadataConfig(), cfg.getBasePath());
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieSparkTable table = HoodieSparkTable.create(getConfig(), context, metaClient);
final BaseFileOnlyView view1 = table.getBaseFileOnlyView();
List<HoodieBaseFile> dataFiles = partitionPaths.stream().flatMap(s -> {
return view1.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"));
}).collect(Collectors.toList());
assertEquals(3, dataFiles.size(), "The data files for commit 003 should be present");
dataFiles = partitionPaths.stream().flatMap(s -> {
return view1.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"));
}).collect(Collectors.toList());
assertEquals(3, dataFiles.size(), "The data files for commit 002 should be present");
/**
* Write 4 (updates)
*/
newCommitTime = "004";
client.startCommitWithTime(newCommitTime);
records = dataGen.generateUpdates(newCommitTime, records);
statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
// Verify there are no errors
assertNoWriteErrors(statuses);
metaClient = HoodieTableMetaClient.reload(metaClient);
table = HoodieSparkTable.create(getConfig(), context, metaClient);
final BaseFileOnlyView view2 = table.getBaseFileOnlyView();
dataFiles = partitionPaths.stream().flatMap(s -> view2.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList());
assertEquals(3, dataFiles.size(), "The data files for commit 004 should be present");
// rolling back to a non existent savepoint must not succeed
assertThrows(HoodieRollbackException.class, () -> {
client.restoreToSavepoint("001");
}, "Rolling back to non-existent savepoint should not be allowed");
// rollback to savepoint 002
HoodieInstant savepoint = table.getCompletedSavepointTimeline().getInstants().findFirst().get();
client.restoreToSavepoint(savepoint.getTimestamp());
metaClient = HoodieTableMetaClient.reload(metaClient);
table = HoodieSparkTable.create(getConfig(), context, metaClient);
final BaseFileOnlyView view3 = table.getBaseFileOnlyView();
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"))).collect(Collectors.toList());
assertEquals(3, dataFiles.size(), "The data files for commit 002 be available");
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"))).collect(Collectors.toList());
assertEquals(0, dataFiles.size(), "The data files for commit 003 should be rolled back");
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList());
assertEquals(0, dataFiles.size(), "The data files for commit 004 should be rolled back");
}
}
use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.
the class HoodieClientTestBase method getHoodieTable.
public HoodieSparkTable getHoodieTable(HoodieTableMetaClient metaClient, HoodieWriteConfig config) {
HoodieSparkTable table = HoodieSparkTable.create(config, context, metaClient);
((SyncableFileSystemView) (table.getSliceView())).reset();
return table;
}
Aggregations