use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class HiveTestUtil method addCOWPartitions.
public static void addCOWPartitions(int numberOfPartitions, boolean isParquetSchemaSimple, boolean useSchemaFromCommitMetadata, ZonedDateTime startFrom, String instantTime) throws IOException, URISyntaxException {
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, isParquetSchemaSimple, useSchemaFromCommitMetadata, startFrom, instantTime, hiveSyncConfig.basePath);
createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
createCommitFile(commitMetadata, instantTime, hiveSyncConfig.basePath);
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class HiveTestUtil method createCOWTable.
public static void createCOWTable(String instantTime, int numberOfPartitions, boolean useSchemaFromCommitMetadata, String basePath, String databaseName, String tableName) throws IOException, URISyntaxException {
Path path = new Path(basePath);
FileIOUtils.deleteDirectory(new File(basePath));
HoodieTableMetaClient.withPropertyBuilder().setTableType(HoodieTableType.COPY_ON_WRITE).setTableName(tableName).setPayloadClass(HoodieAvroPayload.class).initTable(configuration, basePath);
boolean result = fileSystem.mkdirs(path);
checkResult(result);
ZonedDateTime dateTime = ZonedDateTime.now();
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, useSchemaFromCommitMetadata, dateTime, instantTime, basePath);
createdTablesSet.add(databaseName + "." + tableName);
createCommitFile(commitMetadata, instantTime, basePath);
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class TestCluster method createCOWTable.
public void createCOWTable(String commitTime, int numberOfPartitions, String dbName, String tableName) throws Exception {
String tablePathStr = tablePath(dbName, tableName);
Path path = new Path(tablePathStr);
FileIOUtils.deleteDirectory(new File(path.toString()));
HoodieTableMetaClient.withPropertyBuilder().setTableType(HoodieTableType.COPY_ON_WRITE).setTableName(tableName).setPayloadClass(HoodieAvroPayload.class).initTable(conf, path.toString());
boolean result = dfsCluster.getFileSystem().mkdirs(path);
if (!result) {
throw new InitializationError("cannot initialize table");
}
ZonedDateTime dateTime = ZonedDateTime.now();
HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, commitTime, path.toString());
createCommitFile(commitMetadata, commitTime, path.toString());
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class TestCluster method createPartitions.
private HoodieCommitMetadata createPartitions(int numberOfPartitions, boolean isParquetSchemaSimple, ZonedDateTime startFrom, String commitTime, String basePath) throws IOException, URISyntaxException {
startFrom = startFrom.truncatedTo(ChronoUnit.DAYS);
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
for (int i = 0; i < numberOfPartitions; i++) {
String partitionPath = startFrom.format(dtfOut);
Path partPath = new Path(basePath + "/" + partitionPath);
dfsCluster.getFileSystem().makeQualified(partPath);
dfsCluster.getFileSystem().mkdirs(partPath);
List<HoodieWriteStat> writeStats = createTestData(partPath, isParquetSchemaSimple, commitTime);
startFrom = startFrom.minusDays(1);
writeStats.forEach(s -> commitMetadata.addWriteStat(partitionPath, s));
}
return commitMetadata;
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class TestHiveSyncTool method testNotPickingOlderParquetFileWhenLatestCommitReadFailsForExistingTable.
@ParameterizedTest
@MethodSource("syncMode")
public void testNotPickingOlderParquetFileWhenLatestCommitReadFailsForExistingTable(String syncMode) throws Exception {
hiveSyncConfig.syncMode = syncMode;
HiveTestUtil.hiveSyncConfig.batchSyncNum = 2;
final String commitTime = "100";
HiveTestUtil.createCOWTable(commitTime, 1, true);
HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
// create empty commit
final String emptyCommitTime = "200";
HiveTestUtil.createCommitFileWithSchema(commitMetadata, emptyCommitTime, true);
// HiveTestUtil.createCommitFile(commitMetadata, emptyCommitTime);
HoodieHiveClient hiveClient = new HoodieHiveClient(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
assertFalse(hiveClient.doesTableExist(HiveTestUtil.hiveSyncConfig.tableName), "Table " + HiveTestUtil.hiveSyncConfig.tableName + " should not exist initially");
HiveSyncTool tool = new HiveSyncTool(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
tool.syncHoodieTable();
verifyOldParquetFileTest(hiveClient, emptyCommitTime);
// evolve the schema
ZonedDateTime dateTime = ZonedDateTime.now().plusDays(6);
String commitTime2 = "301";
HiveTestUtil.addCOWPartitions(1, false, true, dateTime, commitTime2);
// HiveTestUtil.createCommitFileWithSchema(commitMetadata, "400", false); // create another empty commit
// HiveTestUtil.createCommitFile(commitMetadata, "400"); // create another empty commit
tool = new HiveSyncTool(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
HoodieHiveClient hiveClientLatest = new HoodieHiveClient(HiveTestUtil.hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
// now delete the evolved commit instant
Path fullPath = new Path(HiveTestUtil.hiveSyncConfig.basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + hiveClientLatest.getActiveTimeline().getInstants().filter(inst -> inst.getTimestamp().equals(commitTime2)).findFirst().get().getFileName());
assertTrue(HiveTestUtil.fileSystem.delete(fullPath, false));
try {
tool.syncHoodieTable();
} catch (RuntimeException e) {
// we expect the table sync to fail
}
// old sync values should be left intact
verifyOldParquetFileTest(hiveClient, emptyCommitTime);
}
Aggregations