Search in sources :

Example 1 with HoodieTestSuiteConfig

use of org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig in project hudi by apache.

the class TestHoodieTestSuiteJob method testCOWFullDagFromYaml.

@Test
public void testCOWFullDagFromYaml() throws Exception {
    boolean useDeltaStreamer = false;
    this.cleanDFSDirs();
    String inputBasePath = dfsBasePath + "/input";
    String outputBasePath = dfsBasePath + "/result";
    HoodieTestSuiteConfig cfg = makeConfig(inputBasePath, outputBasePath, useDeltaStreamer, HoodieTableType.COPY_ON_WRITE.name());
    cfg.workloadYamlPath = dfsBasePath + "/" + COW_DAG_FILE_NAME;
    HoodieTestSuiteJob hoodieTestSuiteJob = new HoodieTestSuiteJob(cfg, jsc);
    hoodieTestSuiteJob.runTestSuite();
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(new Configuration()).setBasePath(cfg.targetBasePath).build();
// assertEquals(metaClient.getActiveTimeline().getCommitsTimeline().getInstants().count(), 5);
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Configuration(org.apache.hadoop.conf.Configuration) HoodieTestSuiteJob(org.apache.hudi.integ.testsuite.HoodieTestSuiteJob) HoodieTestSuiteConfig(org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig) Test(org.junit.jupiter.api.Test)

Example 2 with HoodieTestSuiteConfig

use of org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig in project hudi by apache.

the class TestHoodieTestSuiteJob method testSparkSqlDag.

@Test
public void testSparkSqlDag() throws Exception {
    boolean useDeltaStreamer = false;
    this.cleanDFSDirs();
    String inputBasePath = dfsBasePath + "/input";
    String outputBasePath = dfsBasePath + "/result";
    HoodieTestSuiteConfig cfg = makeConfig(inputBasePath, outputBasePath, useDeltaStreamer, HoodieTableType.COPY_ON_WRITE.name());
    cfg.workloadYamlPath = dfsBasePath + "/" + SPARK_SQL_DAG_FILE_NAME;
    HoodieTestSuiteJob hoodieTestSuiteJob = new HoodieTestSuiteJob(cfg, jsc);
    hoodieTestSuiteJob.runTestSuite();
}
Also used : HoodieTestSuiteJob(org.apache.hudi.integ.testsuite.HoodieTestSuiteJob) HoodieTestSuiteConfig(org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig) Test(org.junit.jupiter.api.Test)

Example 3 with HoodieTestSuiteConfig

use of org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig in project hudi by apache.

the class TestHoodieTestSuiteJob method makeConfig.

protected HoodieTestSuiteConfig makeConfig(String inputBasePath, String outputBasePath, boolean useDeltaStream, String tableType) {
    HoodieTestSuiteConfig cfg = new HoodieTestSuiteConfig();
    cfg.targetBasePath = outputBasePath;
    cfg.inputBasePath = inputBasePath;
    cfg.targetTableName = "table1";
    cfg.tableType = tableType;
    cfg.sourceClassName = AvroDFSSource.class.getName();
    cfg.sourceOrderingField = SchemaUtils.SOURCE_ORDERING_FIELD;
    cfg.propsFilePath = dfsBasePath + "/test-source.properties";
    cfg.outputTypeName = DeltaOutputMode.DFS.name();
    cfg.inputFormatName = DeltaInputType.AVRO.name();
    cfg.limitFileSize = 1024 * 1024L;
    cfg.sourceLimit = 20000000;
    cfg.workloadDagGenerator = WorkflowDagGenerator.class.getName();
    cfg.schemaProviderClassName = TestSuiteFileBasedSchemaProvider.class.getName();
    cfg.useDeltaStreamer = useDeltaStream;
    return cfg;
}
Also used : WorkflowDagGenerator(org.apache.hudi.integ.testsuite.dag.WorkflowDagGenerator) TestSuiteFileBasedSchemaProvider(org.apache.hudi.integ.testsuite.schema.TestSuiteFileBasedSchemaProvider) HoodieTestSuiteConfig(org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig) AvroDFSSource(org.apache.hudi.utilities.sources.AvroDFSSource)

Example 4 with HoodieTestSuiteConfig

use of org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig in project hudi by apache.

the class TestHoodieTestSuiteJob method testSparkDataSourceNodesDagWithLock.

@Test
public void testSparkDataSourceNodesDagWithLock() throws Exception {
    boolean useDeltaStreamer = false;
    this.cleanDFSDirs();
    TypedProperties props = getProperties();
    props.setProperty("hoodie.write.concurrency.mode", "optimistic_concurrency_control");
    props.setProperty("hoodie.failed.writes.cleaner.policy", "LAZY");
    UtilitiesTestBase.Helpers.savePropsToDFS(props, dfs, dfsBasePath + "/test-source" + ".properties");
    String inputBasePath = dfsBasePath + "/input";
    String outputBasePath = dfsBasePath + "/result";
    HoodieTestSuiteConfig cfg = makeConfig(inputBasePath, outputBasePath, useDeltaStreamer, HoodieTableType.COPY_ON_WRITE.name());
    cfg.workloadYamlPath = dfsBasePath + "/" + COW_DAG_FILE_NAME_SPARK_DATASOURCE_NODES;
    HoodieTestSuiteJob hoodieTestSuiteJob = new HoodieTestSuiteJob(cfg, jsc);
    hoodieTestSuiteJob.runTestSuite();
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(new Configuration()).setBasePath(cfg.targetBasePath).build();
    assertEquals(metaClient.getActiveTimeline().getCommitsTimeline().getInstants().count(), 3);
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Configuration(org.apache.hadoop.conf.Configuration) HoodieTestSuiteJob(org.apache.hudi.integ.testsuite.HoodieTestSuiteJob) HoodieTestSuiteConfig(org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig) TypedProperties(org.apache.hudi.common.config.TypedProperties) Test(org.junit.jupiter.api.Test)

Example 5 with HoodieTestSuiteConfig

use of org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig in project hudi by apache.

the class TestHoodieTestSuiteJob method testHiveSync.

@Test
public void testHiveSync() throws Exception {
    boolean useDeltaStreamer = false;
    String tableType = "COPY_ON_WRITE";
    this.cleanDFSDirs();
    String inputBasePath = dfsBasePath + "/input";
    String outputBasePath = dfsBasePath + "/result";
    HoodieTestSuiteConfig cfg = makeConfig(inputBasePath, outputBasePath, useDeltaStreamer, tableType);
    if (tableType == HoodieTableType.COPY_ON_WRITE.name()) {
        cfg.workloadDagGenerator = HiveSyncDagGenerator.class.getName();
    } else {
        cfg.workloadDagGenerator = HiveSyncDagGeneratorMOR.class.getName();
    }
    HoodieTestSuiteJob hoodieTestSuiteJob = new HoodieTestSuiteJob(cfg, jsc);
    hoodieTestSuiteJob.runTestSuite();
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(new Configuration()).setBasePath(cfg.targetBasePath).build();
    assertEquals(metaClient.getActiveTimeline().getCommitsTimeline().getInstants().count(), 1);
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Configuration(org.apache.hadoop.conf.Configuration) HoodieTestSuiteJob(org.apache.hudi.integ.testsuite.HoodieTestSuiteJob) HiveSyncDagGenerator(org.apache.hudi.integ.testsuite.dag.HiveSyncDagGenerator) HiveSyncDagGeneratorMOR(org.apache.hudi.integ.testsuite.dag.HiveSyncDagGeneratorMOR) HoodieTestSuiteConfig(org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig) Test(org.junit.jupiter.api.Test)

Aggregations

HoodieTestSuiteConfig (org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig)7 HoodieTestSuiteJob (org.apache.hudi.integ.testsuite.HoodieTestSuiteJob)6 Configuration (org.apache.hadoop.conf.Configuration)5 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)5 Test (org.junit.jupiter.api.Test)5 TypedProperties (org.apache.hudi.common.config.TypedProperties)1 ComplexDagGenerator (org.apache.hudi.integ.testsuite.dag.ComplexDagGenerator)1 HiveSyncDagGenerator (org.apache.hudi.integ.testsuite.dag.HiveSyncDagGenerator)1 HiveSyncDagGeneratorMOR (org.apache.hudi.integ.testsuite.dag.HiveSyncDagGeneratorMOR)1 WorkflowDagGenerator (org.apache.hudi.integ.testsuite.dag.WorkflowDagGenerator)1 TestSuiteFileBasedSchemaProvider (org.apache.hudi.integ.testsuite.schema.TestSuiteFileBasedSchemaProvider)1 AvroDFSSource (org.apache.hudi.utilities.sources.AvroDFSSource)1 MethodSource (org.junit.jupiter.params.provider.MethodSource)1