Search in sources :

Example 1 with HoodieCreateHandle

use of org.apache.hudi.io.HoodieCreateHandle in project hudi by apache.

the class TestCopyOnWriteActionExecutor method testMakeNewPath.

@Test
public void testMakeNewPath() {
    String fileName = UUID.randomUUID().toString();
    String partitionPath = "2016/05/04";
    String instantTime = makeNewCommitTime();
    HoodieWriteConfig config = makeHoodieClientConfig();
    metaClient = HoodieTableMetaClient.reload(metaClient);
    HoodieTable table = HoodieSparkTable.create(config, context, metaClient);
    Pair<Path, String> newPathWithWriteToken = jsc.parallelize(Arrays.asList(1)).map(x -> {
        HoodieRecord record = mock(HoodieRecord.class);
        when(record.getPartitionPath()).thenReturn(partitionPath);
        String writeToken = FSUtils.makeWriteToken(TaskContext.getPartitionId(), TaskContext.get().stageId(), TaskContext.get().taskAttemptId());
        HoodieCreateHandle io = new HoodieCreateHandle(config, instantTime, table, partitionPath, fileName, supplier);
        return Pair.of(io.makeNewPath(record.getPartitionPath()), writeToken);
    }).collect().get(0);
    assertEquals(newPathWithWriteToken.getKey().toString(), Paths.get(this.basePath, partitionPath, FSUtils.makeDataFileName(instantTime, newPathWithWriteToken.getRight(), fileName)).toString());
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieCreateHandle(org.apache.hudi.io.HoodieCreateHandle) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 2 with HoodieCreateHandle

use of org.apache.hudi.io.HoodieCreateHandle in project hudi by apache.

the class TestUpdateSchemaEvolution method prepareFirstRecordCommit.

private WriteStatus prepareFirstRecordCommit(List<String> recordsStrs) throws IOException {
    // Create a bunch of records with an old version of schema
    final HoodieWriteConfig config = makeHoodieClientConfig("/exampleSchema.avsc");
    final HoodieSparkTable table = HoodieSparkTable.create(config, context);
    final List<WriteStatus> statuses = jsc.parallelize(Arrays.asList(1)).map(x -> {
        List<HoodieRecord> insertRecords = new ArrayList<>();
        for (String recordStr : recordsStrs) {
            RawTripTestPayload rowChange = new RawTripTestPayload(recordStr);
            insertRecords.add(new HoodieAvroRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
        }
        Map<String, HoodieRecord> insertRecordMap = insertRecords.stream().collect(Collectors.toMap(r -> r.getRecordKey(), Function.identity()));
        HoodieCreateHandle<?, ?, ?, ?> createHandle = new HoodieCreateHandle(config, "100", table, insertRecords.get(0).getPartitionPath(), "f1-0", insertRecordMap, supplier);
        createHandle.write();
        return createHandle.close().get(0);
    }).collect();
    final Path commitFile = new Path(config.getBasePath() + "/.hoodie/" + HoodieTimeline.makeCommitFileName("100"));
    FSUtils.getFs(basePath, HoodieTestUtils.getDefaultHadoopConf()).create(commitFile);
    return statuses.get(0);
}
Also used : Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils) ParquetDecodingException(org.apache.parquet.io.ParquetDecodingException) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) Option(org.apache.hudi.common.util.Option) Function(java.util.function.Function) HoodieClientTestHarness(org.apache.hudi.testutils.HoodieClientTestHarness) ArrayList(java.util.ArrayList) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieMergeHandle(org.apache.hudi.io.HoodieMergeHandle) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) SchemaTestUtil.getSchemaFromResource(org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) HoodieCreateHandle(org.apache.hudi.io.HoodieCreateHandle) InvalidRecordException(org.apache.parquet.io.InvalidRecordException) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) Executable(org.junit.jupiter.api.function.Executable) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) Assertions.assertDoesNotThrow(org.junit.jupiter.api.Assertions.assertDoesNotThrow) FSUtils(org.apache.hudi.common.fs.FSUtils) Path(org.apache.hadoop.fs.Path) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) HoodieCreateHandle(org.apache.hudi.io.HoodieCreateHandle) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) ArrayList(java.util.ArrayList) List(java.util.List) Map(java.util.Map) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable)

Example 3 with HoodieCreateHandle

use of org.apache.hudi.io.HoodieCreateHandle in project hudi by apache.

the class TestJavaCopyOnWriteActionExecutor method testMakeNewPath.

@Test
public void testMakeNewPath() {
    String fileName = UUID.randomUUID().toString();
    String partitionPath = "2016/05/04";
    String instantTime = makeNewCommitTime();
    HoodieWriteConfig config = makeHoodieClientConfig();
    metaClient = HoodieTableMetaClient.reload(metaClient);
    HoodieTable table = HoodieJavaTable.create(config, context, metaClient);
    Pair<Path, String> newPathWithWriteToken = Arrays.asList(1).stream().map(x -> {
        HoodieRecord record = mock(HoodieRecord.class);
        when(record.getPartitionPath()).thenReturn(partitionPath);
        String writeToken = FSUtils.makeWriteToken(context.getTaskContextSupplier().getPartitionIdSupplier().get(), context.getTaskContextSupplier().getStageIdSupplier().get(), context.getTaskContextSupplier().getAttemptIdSupplier().get());
        HoodieCreateHandle io = new HoodieCreateHandle(config, instantTime, table, partitionPath, fileName, context.getTaskContextSupplier());
        return Pair.of(io.makeNewPath(record.getPartitionPath()), writeToken);
    }).collect(Collectors.toList()).get(0);
    assertEquals(newPathWithWriteToken.getKey().toString(), Paths.get(this.basePath, partitionPath, FSUtils.makeDataFileName(instantTime, newPathWithWriteToken.getRight(), fileName)).toString());
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieCreateHandle(org.apache.hudi.io.HoodieCreateHandle) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Aggregations

Path (org.apache.hadoop.fs.Path)3 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)3 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)3 HoodieCreateHandle (org.apache.hudi.io.HoodieCreateHandle)3 Test (org.junit.jupiter.api.Test)3 HoodieTable (org.apache.hudi.table.HoodieTable)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 List (java.util.List)1 Map (java.util.Map)1 Function (java.util.function.Function)1 Collectors (java.util.stream.Collectors)1 Schema (org.apache.avro.Schema)1 GenericRecord (org.apache.avro.generic.GenericRecord)1 FSUtils (org.apache.hudi.common.fs.FSUtils)1 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)1 HoodieKey (org.apache.hudi.common.model.HoodieKey)1 HoodieRecordLocation (org.apache.hudi.common.model.HoodieRecordLocation)1 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)1