Search in sources :

Example 31 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class TestHoodieRowCreateHandle method testInstantiationFailure.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testInstantiationFailure(boolean enableMetadataTable) {
    // init config and table
    HoodieWriteConfig cfg = SparkDatasetTestUtils.getConfigBuilder(basePath, timelineServicePort).withPath("/dummypath/abc/").withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).build();
    try {
        HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
        new HoodieRowCreateHandle(table, cfg, " def", UUID.randomUUID().toString(), "001", RANDOM.nextInt(100000), RANDOM.nextLong(), RANDOM.nextLong(), SparkDatasetTestUtils.STRUCT_TYPE);
        fail("Should have thrown exception");
    } catch (HoodieInsertException ioe) {
        // expected without metadata table
        if (enableMetadataTable) {
            fail("Should have thrown TableNotFoundException");
        }
    } catch (TableNotFoundException e) {
        // expected with metadata table
        if (!enableMetadataTable) {
            fail("Should have thrown HoodieInsertException");
        }
    }
}
Also used : TableNotFoundException(org.apache.hudi.exception.TableNotFoundException) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieInsertException(org.apache.hudi.exception.HoodieInsertException) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 32 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class TestSparkHoodieHBaseIndex method testSimpleTagLocationWithInvalidCommit.

/*
   * Test case to verify that for taglocation entries present in HBase, if the corresponding commit instant is missing
   * in timeline and the commit is not archived, taglocation would reset the current record location to null.
   */
@Test
public void testSimpleTagLocationWithInvalidCommit() throws Exception {
    // Load to memory
    HoodieWriteConfig config = getConfigBuilder(100, false, false).withRollbackUsingMarkers(false).build();
    SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
    SparkRDDWriteClient writeClient = getHoodieWriteClient(config);
    String newCommitTime = writeClient.startCommit();
    // make a commit with 199 records
    JavaRDD<HoodieRecord> writeRecords = generateAndCommitRecords(writeClient, 199, newCommitTime);
    // make a second commit with a single record
    String invalidCommit = writeClient.startCommit();
    JavaRDD<HoodieRecord> invalidWriteRecords = generateAndCommitRecords(writeClient, 1, invalidCommit);
    // verify location is tagged.
    HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
    JavaRDD<HoodieRecord> javaRDD0 = tagLocation(index, invalidWriteRecords, hoodieTable);
    // one record present
    assert (javaRDD0.collect().size() == 1);
    // it is tagged
    assert (javaRDD0.filter(HoodieRecord::isCurrentLocationKnown).collect().size() == 1);
    assert (javaRDD0.collect().get(0).getCurrentLocation().getInstantTime().equals(invalidCommit));
    // rollback the invalid commit, so that hbase will be left with a stale entry.
    writeClient.rollback(invalidCommit);
    // Now tagLocation for the valid records, hbaseIndex should tag them
    metaClient = HoodieTableMetaClient.reload(metaClient);
    hoodieTable = HoodieSparkTable.create(config, context, metaClient);
    JavaRDD<HoodieRecord> javaRDD1 = tagLocation(index, writeRecords, hoodieTable);
    assert (javaRDD1.filter(HoodieRecord::isCurrentLocationKnown).collect().size() == 199);
    // tagLocation for the invalid record - commit is not present in timeline due to rollback.
    JavaRDD<HoodieRecord> javaRDD2 = tagLocation(index, invalidWriteRecords, hoodieTable);
    // one record present
    assert (javaRDD2.collect().size() == 1);
    // it is not tagged
    assert (javaRDD2.filter(HoodieRecord::isCurrentLocationKnown).collect().size() == 0);
}
Also used : SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 33 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class TestSparkHoodieHBaseIndex method testEnsureTagLocationUsesCommitTimeline.

/*
   * Test case to verify that taglocation() uses the commit timeline to validate the commitTS stored in hbase.
   * When CheckIfValidCommit() in HbaseIndex uses the incorrect timeline filtering, this test would fail.
   */
@Test
public void testEnsureTagLocationUsesCommitTimeline() throws Exception {
    // Load to memory
    HoodieWriteConfig config = getConfigBuilder(100, false, false).withRollbackUsingMarkers(false).build();
    SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
    SparkRDDWriteClient writeClient = getHoodieWriteClient(config);
    String commitTime1 = writeClient.startCommit();
    JavaRDD<HoodieRecord> writeRecords1 = generateAndCommitRecords(writeClient, 20, commitTime1);
    // rollback the commit - leaves a clean file in timeline.
    writeClient.rollback(commitTime1);
    // create a second commit with 20 records
    metaClient = HoodieTableMetaClient.reload(metaClient);
    generateAndCommitRecords(writeClient, 20);
    // Now tagLocation for the first set of rolledback records, hbaseIndex should tag them
    metaClient = HoodieTableMetaClient.reload(metaClient);
    HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
    JavaRDD<HoodieRecord> javaRDD1 = tagLocation(index, writeRecords1, hoodieTable);
    assert (javaRDD1.filter(HoodieRecord::isCurrentLocationKnown).collect().size() == 20);
}
Also used : SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 34 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class TestSparkHoodieHBaseIndex method testSimpleTagLocationAndUpdate.

@ParameterizedTest
@EnumSource(HoodieTableType.class)
public void testSimpleTagLocationAndUpdate(HoodieTableType tableType) throws Exception {
    metaClient = HoodieTestUtils.init(hadoopConf, basePath, tableType);
    final String newCommitTime = "001";
    final int numRecords = 10;
    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, numRecords);
    JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
    // Load to memory
    HoodieWriteConfig config = getConfig();
    SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
    try (SparkRDDWriteClient writeClient = getHoodieWriteClient(config)) {
        metaClient = HoodieTableMetaClient.reload(metaClient);
        HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
        // Test tagLocation without any entries in index
        JavaRDD<HoodieRecord> records1 = tagLocation(index, writeRecords, hoodieTable);
        assertEquals(0, records1.filter(record -> record.isCurrentLocationKnown()).count());
        // Insert 200 records
        writeClient.startCommitWithTime(newCommitTime);
        JavaRDD<WriteStatus> writeStatues = writeClient.upsert(writeRecords, newCommitTime);
        assertNoWriteErrors(writeStatues.collect());
        // Now tagLocation for these records, hbaseIndex should not tag them since commit never occurred
        JavaRDD<HoodieRecord> records2 = tagLocation(index, writeRecords, hoodieTable);
        assertEquals(0, records2.filter(record -> record.isCurrentLocationKnown()).count());
        // Now commit this & update location of records inserted and validate no errors
        writeClient.commit(newCommitTime, writeStatues);
        // Now tagLocation for these records, hbaseIndex should tag them correctly
        metaClient = HoodieTableMetaClient.reload(metaClient);
        hoodieTable = HoodieSparkTable.create(config, context, metaClient);
        List<HoodieRecord> records3 = tagLocation(index, writeRecords, hoodieTable).collect();
        assertEquals(numRecords, records3.stream().filter(record -> record.isCurrentLocationKnown()).count());
        assertEquals(numRecords, records3.stream().map(record -> record.getKey().getRecordKey()).distinct().count());
        assertEquals(numRecords, records3.stream().filter(record -> (record.getCurrentLocation() != null && record.getCurrentLocation().getInstantTime().equals(newCommitTime))).distinct().count());
    }
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) Result(org.apache.hadoop.hbase.client.Result) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) AfterAll(org.junit.jupiter.api.AfterAll) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) BeforeAll(org.junit.jupiter.api.BeforeAll) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) Tag(org.junit.jupiter.api.Tag) Get(org.apache.hadoop.hbase.client.Get) UUID(java.util.UUID) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) Test(org.junit.jupiter.api.Test) List(java.util.List) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) Mockito.atMost(org.mockito.Mockito.atMost) Mockito.mock(org.mockito.Mockito.mock) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) Option(org.apache.hudi.common.util.Option) EnumSource(org.junit.jupiter.params.provider.EnumSource) HashMap(java.util.HashMap) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HTable(org.apache.hadoop.hbase.client.HTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) EmptyHoodieRecordPayload(org.apache.hudi.common.model.EmptyHoodieRecordPayload) LinkedList(java.util.LinkedList) JavaRDD(org.apache.spark.api.java.JavaRDD) Bytes(org.apache.hadoop.hbase.util.Bytes) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) TableName(org.apache.hadoop.hbase.TableName) TestMethodOrder(org.junit.jupiter.api.TestMethodOrder) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Put(org.apache.hadoop.hbase.client.Put) HoodieHBaseIndexConfig(org.apache.hudi.config.HoodieHBaseIndexConfig) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) Mockito.when(org.mockito.Mockito.when) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) MethodOrderer(org.junit.jupiter.api.MethodOrderer) Mockito.verify(org.mockito.Mockito.verify) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) SparkClientFunctionalTestHarness(org.apache.hudi.testutils.SparkClientFunctionalTestHarness) Connection(org.apache.hadoop.hbase.client.Connection) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteStatus(org.apache.hudi.client.WriteStatus) EnumSource(org.junit.jupiter.params.provider.EnumSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 35 with HoodieTable

use of org.apache.hudi.table.HoodieTable in project hudi by apache.

the class TestSparkHoodieHBaseIndex method testHbaseTagLocationForArchivedCommits.

// Verify hbase is tagging records belonging to an archived commit as valid.
@Test
public void testHbaseTagLocationForArchivedCommits() throws Exception {
    // Load to memory
    Map<String, String> params = new HashMap<String, String>();
    params.put(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED.key(), "1");
    params.put(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP.key(), "3");
    params.put(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP.key(), "2");
    HoodieWriteConfig config = getConfigBuilder(100, false, false).withProps(params).build();
    SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
    SparkRDDWriteClient writeClient = getHoodieWriteClient(config);
    // make first commit with 20 records
    JavaRDD<HoodieRecord> writeRecords1 = generateAndCommitRecords(writeClient, 20);
    // Make 3 additional commits, so that first commit is archived
    for (int nCommit = 0; nCommit < 3; nCommit++) {
        generateAndCommitRecords(writeClient, 20);
    }
    // tagLocation for the first set of records (for the archived commit), hbaseIndex should tag them as valid
    metaClient = HoodieTableMetaClient.reload(metaClient);
    HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
    JavaRDD<HoodieRecord> javaRDD1 = tagLocation(index, writeRecords1, hoodieTable);
    assertEquals(20, javaRDD1.filter(HoodieRecord::isCurrentLocationKnown).collect().size());
}
Also used : SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HashMap(java.util.HashMap) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

HoodieTable (org.apache.hudi.table.HoodieTable)133 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)105 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)76 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)75 List (java.util.List)64 Test (org.junit.jupiter.api.Test)63 ArrayList (java.util.ArrayList)58 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)57 WriteStatus (org.apache.hudi.client.WriteStatus)49 Path (org.apache.hadoop.fs.Path)48 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)46 Option (org.apache.hudi.common.util.Option)46 IOException (java.io.IOException)44 Map (java.util.Map)44 Collectors (java.util.stream.Collectors)44 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)43 HashMap (java.util.HashMap)41 Pair (org.apache.hudi.common.util.collection.Pair)39 HoodieKey (org.apache.hudi.common.model.HoodieKey)38 HoodieSparkTable (org.apache.hudi.table.HoodieSparkTable)38