Search in sources :

Example 6 with HoodieFlinkTable

use of org.apache.hudi.table.HoodieFlinkTable in project hudi by apache.

the class TestFlinkHoodieBloomIndex method testTagLocation.

@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testTagLocation(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws Exception {
    // We have some records to be tagged (two different partitions)
    String rowKey1 = randomUUID().toString();
    String rowKey2 = randomUUID().toString();
    String rowKey3 = randomUUID().toString();
    String recordStr1 = "{\"_row_key\":\"" + rowKey1 + "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
    String recordStr2 = "{\"_row_key\":\"" + rowKey2 + "\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
    String recordStr3 = "{\"_row_key\":\"" + rowKey3 + "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
    // place same row key under a different partition.
    String recordStr4 = "{\"_row_key\":\"" + rowKey1 + "\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
    RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
    HoodieRecord record1 = new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
    RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
    HoodieRecord record2 = new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
    RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
    HoodieRecord record3 = new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
    RawTripTestPayload rowChange4 = new RawTripTestPayload(recordStr4);
    HoodieRecord record4 = new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
    List<HoodieRecord> records = asList(record1, record2, record3, record4);
    // Also create the metadata and config
    HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
    HoodieFlinkTable hoodieTable = HoodieFlinkTable.create(config, context, metaClient);
    HoodieFlinkWriteableTestTable testTable = HoodieFlinkWriteableTestTable.of(hoodieTable, SCHEMA);
    // Let's tag
    HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, ListBasedHoodieBloomIndexHelper.getInstance());
    List<HoodieRecord> taggedRecords = tagLocation(bloomIndex, records, hoodieTable);
    // Should not find any files
    for (HoodieRecord record : taggedRecords) {
        assertFalse(record.isCurrentLocationKnown());
    }
    // We create three base file, each having one record. (two different partitions)
    String fileId1 = testTable.addCommit("001").getFileIdWithInserts("2016/01/31", record1);
    String fileId2 = testTable.addCommit("002").getFileIdWithInserts("2016/01/31", record2);
    String fileId3 = testTable.addCommit("003").getFileIdWithInserts("2015/01/31", record4);
    metaClient.reloadActiveTimeline();
    // We do the tag again
    taggedRecords = tagLocation(bloomIndex, records, HoodieFlinkTable.create(config, context, metaClient));
    // Check results
    for (HoodieRecord record : taggedRecords) {
        if (record.getRecordKey().equals(rowKey1)) {
            if (record.getPartitionPath().equals("2015/01/31")) {
                assertEquals(record.getCurrentLocation().getFileId(), fileId3);
            } else {
                assertEquals(record.getCurrentLocation().getFileId(), fileId1);
            }
        } else if (record.getRecordKey().equals(rowKey2)) {
            assertEquals(record.getCurrentLocation().getFileId(), fileId2);
        } else if (record.getRecordKey().equals(rowKey3)) {
            assertFalse(record.isCurrentLocationKnown());
        }
    }
}
Also used : RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieFlinkTable(org.apache.hudi.table.HoodieFlinkTable) HoodieFlinkWriteableTestTable(org.apache.hudi.testutils.HoodieFlinkWriteableTestTable) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 7 with HoodieFlinkTable

use of org.apache.hudi.table.HoodieFlinkTable in project hudi by apache.

the class CompactionUtil method rollbackEarliestCompaction.

/**
 * Rolls back the earliest compaction if there exists.
 *
 * <p>Makes the strategy not that radical: firstly check whether there exists inflight compaction instants,
 * rolls back the first inflight instant only if it has timed out. That means, if there are
 * multiple timed out instants on the timeline, we only roll back the first one at a time.
 */
public static void rollbackEarliestCompaction(HoodieFlinkTable<?> table, Configuration conf) {
    Option<HoodieInstant> earliestInflight = table.getActiveTimeline().filterPendingCompactionTimeline().filter(instant -> instant.getState() == HoodieInstant.State.INFLIGHT).firstInstant();
    if (earliestInflight.isPresent()) {
        HoodieInstant instant = earliestInflight.get();
        String currentTime = HoodieActiveTimeline.createNewInstantTime();
        int timeout = conf.getInteger(FlinkOptions.COMPACTION_TIMEOUT_SECONDS);
        if (StreamerUtil.instantTimeDiffSeconds(currentTime, instant.getTimestamp()) >= timeout) {
            LOG.info("Rollback the inflight compaction instant: " + instant + " for timeout(" + timeout + "s)");
            table.rollbackInflightCompaction(instant);
            table.getMetaClient().reloadActiveTimeline();
        }
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) Schema(org.apache.avro.Schema) Logger(org.slf4j.Logger) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Configuration(org.apache.flink.configuration.Configuration) LoggerFactory(org.slf4j.LoggerFactory) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieFlinkTable(org.apache.hudi.table.HoodieFlinkTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Locale(java.util.Locale) Path(org.apache.hadoop.fs.Path) HoodieFlinkWriteClient(org.apache.hudi.client.HoodieFlinkWriteClient) HoodieIOException(org.apache.hudi.exception.HoodieIOException) FlinkCompactionConfig(org.apache.hudi.sink.compact.FlinkCompactionConfig) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) FlinkOptions(org.apache.hudi.configuration.FlinkOptions)

Example 8 with HoodieFlinkTable

use of org.apache.hudi.table.HoodieFlinkTable in project hudi by apache.

the class CompactionPlanOperator method scheduleCompaction.

private void scheduleCompaction(HoodieFlinkTable<?> table, long checkpointId) throws IOException {
    // the first instant takes the highest priority.
    Option<HoodieInstant> firstRequested = table.getActiveTimeline().filterPendingCompactionTimeline().filter(instant -> instant.getState() == HoodieInstant.State.REQUESTED).firstInstant();
    if (!firstRequested.isPresent()) {
        // do nothing.
        LOG.info("No compaction plan for checkpoint " + checkpointId);
        return;
    }
    String compactionInstantTime = firstRequested.get().getTimestamp();
    // generate compaction plan
    // should support configurable commit metadata
    HoodieCompactionPlan compactionPlan = CompactionUtils.getCompactionPlan(table.getMetaClient(), compactionInstantTime);
    if (compactionPlan == null || (compactionPlan.getOperations() == null) || (compactionPlan.getOperations().isEmpty())) {
        // do nothing.
        LOG.info("Empty compaction plan for instant " + compactionInstantTime);
    } else {
        HoodieInstant instant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime);
        // Mark instant as compaction inflight
        table.getActiveTimeline().transitionCompactionRequestedToInflight(instant);
        table.getMetaClient().reloadActiveTimeline();
        List<CompactionOperation> operations = compactionPlan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(toList());
        LOG.info("Execute compaction plan for instant {} as {} file groups", compactionInstantTime, operations.size());
        for (CompactionOperation operation : operations) {
            output.collect(new StreamRecord<>(new CompactionPlanEvent(compactionInstantTime, operation)));
        }
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Configuration(org.apache.flink.configuration.Configuration) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieFlinkTable(org.apache.hudi.table.HoodieFlinkTable) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) FlinkTables(org.apache.hudi.util.FlinkTables) Output(org.apache.flink.streaming.api.operators.Output) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) CompactionUtil(org.apache.hudi.util.CompactionUtil) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) CompactionOperation(org.apache.hudi.common.model.CompactionOperation)

Aggregations

HoodieFlinkTable (org.apache.hudi.table.HoodieFlinkTable)8 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)5 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)4 IOException (java.io.IOException)3 Path (org.apache.hadoop.fs.Path)3 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Configuration (org.apache.flink.configuration.Configuration)2 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)2 HoodieKey (org.apache.hudi.common.model.HoodieKey)2 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)2 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)2 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)2 RawTripTestPayload (org.apache.hudi.common.testutils.RawTripTestPayload)2 Option (org.apache.hudi.common.util.Option)2 HoodieWriteMetadata (org.apache.hudi.table.action.HoodieWriteMetadata)2 HoodieFlinkWriteableTestTable (org.apache.hudi.testutils.HoodieFlinkWriteableTestTable)2 MethodSource (org.junit.jupiter.params.provider.MethodSource)2 Locale (java.util.Locale)1