Search in sources :

Example 1 with TableCommand

use of org.apache.hudi.cli.commands.TableCommand in project hudi by apache.

the class ITTestHDFSParquetImportCommand method testConvertWithUpsert.

/**
 * Test case for 'hdfsparquetimport' with upsert.
 */
@Test
public void testConvertWithUpsert() throws IOException, ParseException {
    Path upsertFolder = new Path(basePath, "testUpsertSrc");
    List<GenericRecord> upsertData = importer.createUpsertRecords(upsertFolder);
    // first insert records
    HDFSParquetImporter.Config cfg = importer.getHDFSParquetImporterConfig(sourcePath.toString(), tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(), "_row_key", "timestamp", 1, schemaFile);
    HDFSParquetImporter dataImporter = new HDFSParquetImporter(cfg);
    dataImporter.dataImport(jsc, 0);
    // Load meta data
    new TableCommand().connect(targetPath.toString(), TimelineLayoutVersion.VERSION_1, false, 2000, 300000, 7);
    metaClient = HoodieCLI.getTableMetaClient();
    // check if insert instant exist
    assertEquals(1, metaClient.getActiveTimeline().getCommitsTimeline().countInstants(), "Should only 1 commit.");
    String command = String.format("hdfsparquetimport --srcPath %s --targetPath %s --tableName %s " + "--tableType %s --rowKeyField %s" + " --partitionPathField %s --parallelism %s " + "--schemaFilePath %s --format %s --sparkMemory %s --retry %s --sparkMaster %s --upsert %s", upsertFolder.toString(), targetPath.toString(), tableName, HoodieTableType.COPY_ON_WRITE.name(), "_row_key", "timestamp", "1", schemaFile, "parquet", "2G", "1", "local", "true");
    CommandResult cr = getShell().executeCommand(command);
    assertAll("Command run success", () -> assertTrue(cr.isSuccess()), () -> assertEquals("Table imported to hoodie format", cr.getResult().toString()));
    // reload meta client
    metaClient = HoodieTableMetaClient.reload(metaClient);
    assertEquals(2, metaClient.getActiveTimeline().getCommitsTimeline().countInstants(), "Should have 2 commit.");
    // construct result, remove top 10 and add upsert data.
    List<GenericRecord> expectData = insertData.subList(11, 96);
    expectData.addAll(upsertData);
    verifyResultData(expectData);
}
Also used : Path(org.apache.hadoop.fs.Path) TestHDFSParquetImporter(org.apache.hudi.utilities.functional.TestHDFSParquetImporter) HDFSParquetImporter(org.apache.hudi.utilities.HDFSParquetImporter) GenericRecord(org.apache.avro.generic.GenericRecord) TableCommand(org.apache.hudi.cli.commands.TableCommand) CommandResult(org.springframework.shell.core.CommandResult) AbstractShellIntegrationTest(org.apache.hudi.cli.testutils.AbstractShellIntegrationTest) Test(org.junit.jupiter.api.Test)

Example 2 with TableCommand

use of org.apache.hudi.cli.commands.TableCommand in project hudi by apache.

the class ITTestMarkersCommand method init.

@BeforeEach
public void init() throws IOException {
    String tableName = "test_table";
    tablePath = basePath + Path.SEPARATOR + tableName;
    // Create table and connect
    new TableCommand().createTable(tablePath, "test_table", HoodieTableType.COPY_ON_WRITE.name(), "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
}
Also used : TableCommand(org.apache.hudi.cli.commands.TableCommand) BeforeEach(org.junit.jupiter.api.BeforeEach)

Example 3 with TableCommand

use of org.apache.hudi.cli.commands.TableCommand in project hudi by apache.

the class ITTestRepairsCommand method init.

@BeforeEach
public void init() throws Exception {
    final String tablePath = Paths.get(basePath, "test_table").toString();
    duplicatedPartitionPath = Paths.get(tablePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).toString();
    duplicatedPartitionPathWithUpdates = Paths.get(tablePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).toString();
    duplicatedPartitionPathWithUpserts = Paths.get(tablePath, HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH).toString();
    repairedOutputPath = Paths.get(basePath, "tmp").toString();
    HoodieCLI.conf = jsc.hadoopConfiguration();
    // Create table and connect
    new TableCommand().createTable(tablePath, "test_table", HoodieTableType.COPY_ON_WRITE.name(), "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
    // generate 200 records
    Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
    HoodieSparkWriteableTestTable testTable = HoodieSparkWriteableTestTable.of(HoodieCLI.getTableMetaClient(), schema);
    HoodieRecord[] hoodieRecords1 = SchemaTestUtil.generateHoodieTestRecords(0, 100, schema).toArray(new HoodieRecord[100]);
    HoodieRecord[] hoodieRecords2 = SchemaTestUtil.generateHoodieTestRecords(100, 100, schema).toArray(new HoodieRecord[100]);
    testTable.addCommit("20160401010101").withInserts(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "1", hoodieRecords1).withInserts(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "2", hoodieRecords2).getFileIdWithLogFile(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
    testTable.withInserts(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "4", hoodieRecords1).withInserts(HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH, "6", hoodieRecords1);
    // read records and get 10 to generate duplicates
    HoodieRecord[] dupRecords = Arrays.copyOf(hoodieRecords1, 10);
    testTable.withInserts(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "5", dupRecords);
    testTable.addCommit("20160401010202").withInserts(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "3", dupRecords);
    testTable.withInserts(HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH, "7", dupRecords).withInserts(HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH, "8", dupRecords);
    metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
    fileFormat = metaClient.getTableConfig().getBaseFileFormat();
}
Also used : HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Schema(org.apache.avro.Schema) HoodieSparkWriteableTestTable(org.apache.hudi.testutils.HoodieSparkWriteableTestTable) TableCommand(org.apache.hudi.cli.commands.TableCommand) BeforeEach(org.junit.jupiter.api.BeforeEach)

Example 4 with TableCommand

use of org.apache.hudi.cli.commands.TableCommand in project hudi by apache.

the class ITTestSavepointsCommand method init.

@BeforeEach
public void init() throws IOException {
    String tableName = "test_table";
    tablePath = basePath + Path.SEPARATOR + tableName;
    // Create table and connect
    new TableCommand().createTable(tablePath, "test_table", HoodieTableType.COPY_ON_WRITE.name(), "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
}
Also used : TableCommand(org.apache.hudi.cli.commands.TableCommand) BeforeEach(org.junit.jupiter.api.BeforeEach)

Example 5 with TableCommand

use of org.apache.hudi.cli.commands.TableCommand in project hudi by apache.

the class ITTestBootstrapCommand method testBootstrapRunCommand.

/**
 * Test case for command 'bootstrap'.
 */
@Test
public void testBootstrapRunCommand() throws IOException {
    // test bootstrap run command
    String cmdStr = String.format("bootstrap run --targetPath %s --tableName %s --tableType %s --srcPath %s --rowKeyField %s --partitionPathField %s --sparkMaster %s", tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(), sourcePath, RECORD_KEY_FIELD, PARTITION_FIELD, "local");
    CommandResult cr = getShell().executeCommand(cmdStr);
    assertTrue(cr.isSuccess());
    // Connect & check Hudi table exist
    new TableCommand().connect(tablePath, TimelineLayoutVersion.VERSION_1, false, 2000, 300000, 7);
    metaClient = HoodieCLI.getTableMetaClient();
    assertEquals(1, metaClient.getActiveTimeline().getCommitsTimeline().countInstants(), "Should have 1 commit.");
    // test "bootstrap index showpartitions"
    CommandResult crForIndexedPartitions = getShell().executeCommand("bootstrap index showpartitions");
    assertTrue(crForIndexedPartitions.isSuccess());
    String[] header = new String[] { "Indexed partitions" };
    String[][] rows = new String[partitions.size()][1];
    for (int i = 0; i < partitions.size(); i++) {
        rows[i][0] = PARTITION_FIELD + "=" + partitions.get(i);
    }
    String expect = HoodiePrintHelper.print(header, rows);
    expect = removeNonWordAndStripSpace(expect);
    String got = removeNonWordAndStripSpace(crForIndexedPartitions.getResult().toString());
    assertEquals(expect, got);
    // test "bootstrap index showMapping"
    CommandResult crForIndexedMapping = getShell().executeCommand("bootstrap index showmapping");
    assertTrue(crForIndexedMapping.isSuccess());
    CommandResult crForIndexedMappingWithPartition = getShell().executeCommand(String.format("bootstrap index showmapping --partitionPath %s=%s", PARTITION_FIELD, partitions.get(0)));
    assertTrue(crForIndexedMappingWithPartition.isSuccess());
}
Also used : TableCommand(org.apache.hudi.cli.commands.TableCommand) CommandResult(org.springframework.shell.core.CommandResult) AbstractShellIntegrationTest(org.apache.hudi.cli.testutils.AbstractShellIntegrationTest) Test(org.junit.jupiter.api.Test)

Aggregations

TableCommand (org.apache.hudi.cli.commands.TableCommand)9 BeforeEach (org.junit.jupiter.api.BeforeEach)6 AbstractShellIntegrationTest (org.apache.hudi.cli.testutils.AbstractShellIntegrationTest)3 Test (org.junit.jupiter.api.Test)3 CommandResult (org.springframework.shell.core.CommandResult)3 Schema (org.apache.avro.Schema)1 GenericRecord (org.apache.avro.generic.GenericRecord)1 Path (org.apache.hadoop.fs.Path)1 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)1 HoodieSparkWriteableTestTable (org.apache.hudi.testutils.HoodieSparkWriteableTestTable)1 HDFSParquetImporter (org.apache.hudi.utilities.HDFSParquetImporter)1 TestHDFSParquetImporter (org.apache.hudi.utilities.functional.TestHDFSParquetImporter)1