Search in sources :

Example 1 with HoodieInternalWriteStatus

use of org.apache.hudi.client.HoodieInternalWriteStatus in project hudi by apache.

the class BulkInsertWriterHelper method toWriteStatus.

/**
 * Tool to convert {@link HoodieInternalWriteStatus} into {@link WriteStatus}.
 */
private static WriteStatus toWriteStatus(HoodieInternalWriteStatus internalWriteStatus) {
    WriteStatus writeStatus = new WriteStatus(false, 0.1);
    writeStatus.setStat(internalWriteStatus.getStat());
    writeStatus.setFileId(internalWriteStatus.getFileId());
    writeStatus.setGlobalError(internalWriteStatus.getGlobalError());
    writeStatus.setTotalRecords(internalWriteStatus.getTotalRecords());
    writeStatus.setTotalErrorRecords(internalWriteStatus.getTotalErrorRecords());
    return writeStatus;
}
Also used : WriteStatus(org.apache.hudi.client.WriteStatus) HoodieInternalWriteStatus(org.apache.hudi.client.HoodieInternalWriteStatus)

Example 2 with HoodieInternalWriteStatus

use of org.apache.hudi.client.HoodieInternalWriteStatus in project hudi by apache.

the class HoodieBulkInsertInternalWriterTestBase method assertWriteStatuses.

protected void assertWriteStatuses(List<HoodieInternalWriteStatus> writeStatuses, int batches, int size, boolean areRecordsSorted, Option<List<String>> fileAbsPaths, Option<List<String>> fileNames) {
    if (areRecordsSorted) {
        assertEquals(batches, writeStatuses.size());
    } else {
        assertEquals(Math.min(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS.length, batches), writeStatuses.size());
    }
    Map<String, Long> sizeMap = new HashMap<>();
    if (!areRecordsSorted) {
        // per write status
        for (int i = 0; i < batches; i++) {
            String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[i % 3];
            if (!sizeMap.containsKey(partitionPath)) {
                sizeMap.put(partitionPath, 0L);
            }
            sizeMap.put(partitionPath, sizeMap.get(partitionPath) + size);
        }
    }
    int counter = 0;
    for (HoodieInternalWriteStatus writeStatus : writeStatuses) {
        // verify write status
        assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3], writeStatus.getPartitionPath());
        if (areRecordsSorted) {
            assertEquals(writeStatus.getTotalRecords(), size);
        } else {
            assertEquals(writeStatus.getTotalRecords(), sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]));
        }
        assertNull(writeStatus.getGlobalError());
        assertEquals(writeStatus.getFailedRowsSize(), 0);
        assertEquals(writeStatus.getTotalErrorRecords(), 0);
        assertFalse(writeStatus.hasErrors());
        assertNotNull(writeStatus.getFileId());
        String fileId = writeStatus.getFileId();
        if (fileAbsPaths.isPresent()) {
            fileAbsPaths.get().add(basePath + "/" + writeStatus.getStat().getPath());
        }
        if (fileNames.isPresent()) {
            fileNames.get().add(writeStatus.getStat().getPath().substring(writeStatus.getStat().getPath().lastIndexOf('/') + 1));
        }
        HoodieWriteStat writeStat = writeStatus.getStat();
        if (areRecordsSorted) {
            assertEquals(size, writeStat.getNumInserts());
            assertEquals(size, writeStat.getNumWrites());
        } else {
            assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumInserts());
            assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumWrites());
        }
        assertEquals(fileId, writeStat.getFileId());
        assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter++ % 3], writeStat.getPartitionPath());
        assertEquals(0, writeStat.getNumDeletes());
        assertEquals(0, writeStat.getNumUpdateWrites());
        assertEquals(0, writeStat.getTotalWriteErrors());
    }
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HashMap(java.util.HashMap) HoodieInternalWriteStatus(org.apache.hudi.client.HoodieInternalWriteStatus)

Example 3 with HoodieInternalWriteStatus

use of org.apache.hudi.client.HoodieInternalWriteStatus in project hudi by apache.

the class TestHoodieRowCreateHandle method testGlobalFailure.

/**
 * Issue some corrupted or wrong schematized InternalRow after few valid InternalRows so that global error is thrown. write batch 1 of valid records write batch 2 of invalid records Global Error
 * should be thrown.
 */
@Test
public void testGlobalFailure() throws Exception {
    // init config and table
    HoodieWriteConfig cfg = SparkDatasetTestUtils.getConfigBuilder(basePath, timelineServicePort).build();
    HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
    String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[0];
    // init some args
    String fileId = UUID.randomUUID().toString();
    String instantTime = "000";
    HoodieRowCreateHandle handle = new HoodieRowCreateHandle(table, cfg, partitionPath, fileId, instantTime, RANDOM.nextInt(100000), RANDOM.nextLong(), RANDOM.nextLong(), SparkDatasetTestUtils.STRUCT_TYPE);
    int size = 10 + RANDOM.nextInt(1000);
    int totalFailures = 5;
    // Generate first batch of valid rows
    Dataset<Row> inputRows = SparkDatasetTestUtils.getRandomRows(sqlContext, size / 2, partitionPath, false);
    List<InternalRow> internalRows = SparkDatasetTestUtils.toInternalRows(inputRows, SparkDatasetTestUtils.ENCODER);
    // generate some failures rows
    for (int i = 0; i < totalFailures; i++) {
        internalRows.add(SparkDatasetTestUtils.getInternalRowWithError(partitionPath));
    }
    // generate 2nd batch of valid rows
    Dataset<Row> inputRows2 = SparkDatasetTestUtils.getRandomRows(sqlContext, size / 2, partitionPath, false);
    internalRows.addAll(SparkDatasetTestUtils.toInternalRows(inputRows2, SparkDatasetTestUtils.ENCODER));
    // issue writes
    try {
        for (InternalRow internalRow : internalRows) {
            handle.write(internalRow);
        }
        fail("Should have failed");
    } catch (Throwable e) {
    // expected
    }
    // close the create handle
    HoodieInternalWriteStatus writeStatus = handle.close();
    List<String> fileNames = new ArrayList<>();
    fileNames.add(handle.getFileName());
    // verify write status
    assertNotNull(writeStatus.getGlobalError());
    assertTrue(writeStatus.getGlobalError().getMessage().contains("java.lang.String cannot be cast to org.apache.spark.unsafe.types.UTF8String"));
    assertEquals(writeStatus.getFileId(), fileId);
    assertEquals(writeStatus.getPartitionPath(), partitionPath);
    // verify rows
    Dataset<Row> result = sqlContext.read().parquet(basePath + "/" + partitionPath);
    // passing only first batch of inputRows since after first batch global error would have been thrown
    assertRows(inputRows, result, instantTime, fileNames);
}
Also used : ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieInternalWriteStatus(org.apache.hudi.client.HoodieInternalWriteStatus) HoodieTable(org.apache.hudi.table.HoodieTable) InternalRow(org.apache.spark.sql.catalyst.InternalRow) Row(org.apache.spark.sql.Row) InternalRow(org.apache.spark.sql.catalyst.InternalRow) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 4 with HoodieInternalWriteStatus

use of org.apache.hudi.client.HoodieInternalWriteStatus in project hudi by apache.

the class TestHoodieRowCreateHandle method testRowCreateHandle.

@Test
public void testRowCreateHandle() throws Exception {
    // init config and table
    HoodieWriteConfig cfg = SparkDatasetTestUtils.getConfigBuilder(basePath, timelineServicePort).build();
    HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
    List<String> fileNames = new ArrayList<>();
    List<String> fileAbsPaths = new ArrayList<>();
    Dataset<Row> totalInputRows = null;
    // one round per partition
    for (int i = 0; i < 5; i++) {
        String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[i % 3];
        // init some args
        String fileId = UUID.randomUUID().toString();
        String instantTime = "000";
        HoodieRowCreateHandle handle = new HoodieRowCreateHandle(table, cfg, partitionPath, fileId, instantTime, RANDOM.nextInt(100000), RANDOM.nextLong(), RANDOM.nextLong(), SparkDatasetTestUtils.STRUCT_TYPE);
        int size = 10 + RANDOM.nextInt(1000);
        // Generate inputs
        Dataset<Row> inputRows = SparkDatasetTestUtils.getRandomRows(sqlContext, size, partitionPath, false);
        if (totalInputRows == null) {
            totalInputRows = inputRows;
        } else {
            totalInputRows = totalInputRows.union(inputRows);
        }
        // issue writes
        HoodieInternalWriteStatus writeStatus = writeAndGetWriteStatus(inputRows, handle);
        fileAbsPaths.add(basePath + "/" + writeStatus.getStat().getPath());
        fileNames.add(handle.getFileName());
        // verify output
        assertOutput(writeStatus, size, fileId, partitionPath, instantTime, totalInputRows, fileNames, fileAbsPaths);
    }
}
Also used : HoodieInternalWriteStatus(org.apache.hudi.client.HoodieInternalWriteStatus) HoodieTable(org.apache.hudi.table.HoodieTable) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) InternalRow(org.apache.spark.sql.catalyst.InternalRow) Row(org.apache.spark.sql.Row) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

HoodieInternalWriteStatus (org.apache.hudi.client.HoodieInternalWriteStatus)4 ArrayList (java.util.ArrayList)2 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)2 HoodieTable (org.apache.hudi.table.HoodieTable)2 Row (org.apache.spark.sql.Row)2 InternalRow (org.apache.spark.sql.catalyst.InternalRow)2 Test (org.junit.jupiter.api.Test)2 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)2 HashMap (java.util.HashMap)1 WriteStatus (org.apache.hudi.client.WriteStatus)1 HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)1