use of org.apache.hudi.cli.commands.TableCommand in project hudi by apache.
the class ITTestHDFSParquetImportCommand method testConvertWithUpsert.
/**
* Test case for 'hdfsparquetimport' with upsert.
*/
@Test
public void testConvertWithUpsert() throws IOException, ParseException {
Path upsertFolder = new Path(basePath, "testUpsertSrc");
List<GenericRecord> upsertData = importer.createUpsertRecords(upsertFolder);
// first insert records
HDFSParquetImporter.Config cfg = importer.getHDFSParquetImporterConfig(sourcePath.toString(), tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(), "_row_key", "timestamp", 1, schemaFile);
HDFSParquetImporter dataImporter = new HDFSParquetImporter(cfg);
dataImporter.dataImport(jsc, 0);
// Load meta data
new TableCommand().connect(targetPath.toString(), TimelineLayoutVersion.VERSION_1, false, 2000, 300000, 7);
metaClient = HoodieCLI.getTableMetaClient();
// check if insert instant exist
assertEquals(1, metaClient.getActiveTimeline().getCommitsTimeline().countInstants(), "Should only 1 commit.");
String command = String.format("hdfsparquetimport --srcPath %s --targetPath %s --tableName %s " + "--tableType %s --rowKeyField %s" + " --partitionPathField %s --parallelism %s " + "--schemaFilePath %s --format %s --sparkMemory %s --retry %s --sparkMaster %s --upsert %s", upsertFolder.toString(), targetPath.toString(), tableName, HoodieTableType.COPY_ON_WRITE.name(), "_row_key", "timestamp", "1", schemaFile, "parquet", "2G", "1", "local", "true");
CommandResult cr = getShell().executeCommand(command);
assertAll("Command run success", () -> assertTrue(cr.isSuccess()), () -> assertEquals("Table imported to hoodie format", cr.getResult().toString()));
// reload meta client
metaClient = HoodieTableMetaClient.reload(metaClient);
assertEquals(2, metaClient.getActiveTimeline().getCommitsTimeline().countInstants(), "Should have 2 commit.");
// construct result, remove top 10 and add upsert data.
List<GenericRecord> expectData = insertData.subList(11, 96);
expectData.addAll(upsertData);
verifyResultData(expectData);
}
use of org.apache.hudi.cli.commands.TableCommand in project hudi by apache.
the class ITTestMarkersCommand method init.
@BeforeEach
public void init() throws IOException {
String tableName = "test_table";
tablePath = basePath + Path.SEPARATOR + tableName;
// Create table and connect
new TableCommand().createTable(tablePath, "test_table", HoodieTableType.COPY_ON_WRITE.name(), "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
}
use of org.apache.hudi.cli.commands.TableCommand in project hudi by apache.
the class ITTestRepairsCommand method init.
@BeforeEach
public void init() throws Exception {
final String tablePath = Paths.get(basePath, "test_table").toString();
duplicatedPartitionPath = Paths.get(tablePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).toString();
duplicatedPartitionPathWithUpdates = Paths.get(tablePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).toString();
duplicatedPartitionPathWithUpserts = Paths.get(tablePath, HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH).toString();
repairedOutputPath = Paths.get(basePath, "tmp").toString();
HoodieCLI.conf = jsc.hadoopConfiguration();
// Create table and connect
new TableCommand().createTable(tablePath, "test_table", HoodieTableType.COPY_ON_WRITE.name(), "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
// generate 200 records
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
HoodieSparkWriteableTestTable testTable = HoodieSparkWriteableTestTable.of(HoodieCLI.getTableMetaClient(), schema);
HoodieRecord[] hoodieRecords1 = SchemaTestUtil.generateHoodieTestRecords(0, 100, schema).toArray(new HoodieRecord[100]);
HoodieRecord[] hoodieRecords2 = SchemaTestUtil.generateHoodieTestRecords(100, 100, schema).toArray(new HoodieRecord[100]);
testTable.addCommit("20160401010101").withInserts(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "1", hoodieRecords1).withInserts(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "2", hoodieRecords2).getFileIdWithLogFile(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
testTable.withInserts(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "4", hoodieRecords1).withInserts(HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH, "6", hoodieRecords1);
// read records and get 10 to generate duplicates
HoodieRecord[] dupRecords = Arrays.copyOf(hoodieRecords1, 10);
testTable.withInserts(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "5", dupRecords);
testTable.addCommit("20160401010202").withInserts(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "3", dupRecords);
testTable.withInserts(HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH, "7", dupRecords).withInserts(HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH, "8", dupRecords);
metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
fileFormat = metaClient.getTableConfig().getBaseFileFormat();
}
use of org.apache.hudi.cli.commands.TableCommand in project hudi by apache.
the class ITTestSavepointsCommand method init.
@BeforeEach
public void init() throws IOException {
String tableName = "test_table";
tablePath = basePath + Path.SEPARATOR + tableName;
// Create table and connect
new TableCommand().createTable(tablePath, "test_table", HoodieTableType.COPY_ON_WRITE.name(), "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
}
use of org.apache.hudi.cli.commands.TableCommand in project hudi by apache.
the class ITTestBootstrapCommand method testBootstrapRunCommand.
/**
* Test case for command 'bootstrap'.
*/
@Test
public void testBootstrapRunCommand() throws IOException {
// test bootstrap run command
String cmdStr = String.format("bootstrap run --targetPath %s --tableName %s --tableType %s --srcPath %s --rowKeyField %s --partitionPathField %s --sparkMaster %s", tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(), sourcePath, RECORD_KEY_FIELD, PARTITION_FIELD, "local");
CommandResult cr = getShell().executeCommand(cmdStr);
assertTrue(cr.isSuccess());
// Connect & check Hudi table exist
new TableCommand().connect(tablePath, TimelineLayoutVersion.VERSION_1, false, 2000, 300000, 7);
metaClient = HoodieCLI.getTableMetaClient();
assertEquals(1, metaClient.getActiveTimeline().getCommitsTimeline().countInstants(), "Should have 1 commit.");
// test "bootstrap index showpartitions"
CommandResult crForIndexedPartitions = getShell().executeCommand("bootstrap index showpartitions");
assertTrue(crForIndexedPartitions.isSuccess());
String[] header = new String[] { "Indexed partitions" };
String[][] rows = new String[partitions.size()][1];
for (int i = 0; i < partitions.size(); i++) {
rows[i][0] = PARTITION_FIELD + "=" + partitions.get(i);
}
String expect = HoodiePrintHelper.print(header, rows);
expect = removeNonWordAndStripSpace(expect);
String got = removeNonWordAndStripSpace(crForIndexedPartitions.getResult().toString());
assertEquals(expect, got);
// test "bootstrap index showMapping"
CommandResult crForIndexedMapping = getShell().executeCommand("bootstrap index showmapping");
assertTrue(crForIndexedMapping.isSuccess());
CommandResult crForIndexedMappingWithPartition = getShell().executeCommand(String.format("bootstrap index showmapping --partitionPath %s=%s", PARTITION_FIELD, partitions.get(0)));
assertTrue(crForIndexedMappingWithPartition.isSuccess());
}
Aggregations