Search in sources :

Example 11 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestHoodieRowCreateHandle method assertOutput.

private void assertOutput(HoodieInternalWriteStatus writeStatus, int size, String fileId, String partitionPath, String instantTime, Dataset<Row> inputRows, List<String> filenames, List<String> fileAbsPaths) {
    assertEquals(writeStatus.getPartitionPath(), partitionPath);
    assertEquals(writeStatus.getTotalRecords(), size);
    assertEquals(writeStatus.getFailedRowsSize(), 0);
    assertEquals(writeStatus.getTotalErrorRecords(), 0);
    assertFalse(writeStatus.hasErrors());
    assertNull(writeStatus.getGlobalError());
    assertEquals(writeStatus.getFileId(), fileId);
    HoodieWriteStat writeStat = writeStatus.getStat();
    assertEquals(size, writeStat.getNumInserts());
    assertEquals(size, writeStat.getNumWrites());
    assertEquals(fileId, writeStat.getFileId());
    assertEquals(partitionPath, writeStat.getPartitionPath());
    assertEquals(0, writeStat.getNumDeletes());
    assertEquals(0, writeStat.getNumUpdateWrites());
    assertEquals(0, writeStat.getTotalWriteErrors());
    // verify rows
    Dataset<Row> result = sqlContext.read().parquet(fileAbsPaths.toArray(new String[0]));
    assertRows(inputRows, result, instantTime, filenames);
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) InternalRow(org.apache.spark.sql.catalyst.InternalRow) Row(org.apache.spark.sql.Row)

Example 12 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestSparkHoodieHBaseIndex method getSampleWriteStatus.

private WriteStatus getSampleWriteStatus(final int numInserts, final int numUpdateWrites) {
    final WriteStatus writeStatus = new WriteStatus(false, 0.1);
    HoodieWriteStat hoodieWriteStat = new HoodieWriteStat();
    hoodieWriteStat.setNumInserts(numInserts);
    hoodieWriteStat.setNumUpdateWrites(numUpdateWrites);
    writeStatus.setStat(hoodieWriteStat);
    return writeStatus;
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) WriteStatus(org.apache.hudi.client.WriteStatus)

Example 13 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class HoodieTestCommitMetadataGenerator method generateCommitMetadata.

/**
 * Method to generate commit metadata.
 */
private static HoodieCommitMetadata generateCommitMetadata(Map<String, List<String>> partitionToFilePaths, Option<Integer> writes, Option<Integer> updates, Map<String, String> extraMetadata) {
    HoodieCommitMetadata metadata = new HoodieCommitMetadata();
    for (Map.Entry<String, String> entry : extraMetadata.entrySet()) {
        metadata.addMetadata(entry.getKey(), entry.getValue());
    }
    partitionToFilePaths.forEach((key, value) -> value.forEach(f -> {
        HoodieWriteStat writeStat = new HoodieWriteStat();
        writeStat.setPartitionPath(key);
        writeStat.setPath(DEFAULT_PATH);
        writeStat.setFileId(DEFAULT_FILEID);
        writeStat.setTotalWriteBytes(DEFAULT_TOTAL_WRITE_BYTES);
        writeStat.setPrevCommit(DEFAULT_PRE_COMMIT);
        writeStat.setNumWrites(writes.orElse(DEFAULT_NUM_WRITES));
        writeStat.setNumUpdateWrites(updates.orElse(DEFAULT_NUM_UPDATE_WRITES));
        writeStat.setTotalLogBlocks(DEFAULT_TOTAL_LOG_BLOCKS);
        writeStat.setTotalLogRecords(DEFAULT_TOTAL_LOG_RECORDS);
        metadata.addWriteStat(key, writeStat);
    }));
    return metadata;
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Arrays(java.util.Arrays) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HashMap(java.util.HashMap) UUID(java.util.UUID) FileCreateUtils.baseFileName(org.apache.hudi.common.testutils.FileCreateUtils.baseFileName) FileCreateUtils(org.apache.hudi.common.testutils.FileCreateUtils) StandardCharsets(java.nio.charset.StandardCharsets) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) CollectionUtils.createImmutableList(org.apache.hudi.common.util.CollectionUtils.createImmutableList) Path(org.apache.hadoop.fs.Path) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HashMap(java.util.HashMap) Map(java.util.Map)

Example 14 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class CommitUtils method buildMetadataFromStats.

private static HoodieCommitMetadata buildMetadataFromStats(List<HoodieWriteStat> writeStats, Map<String, List<String>> partitionToReplaceFileIds, String commitActionType, WriteOperationType operationType) {
    final HoodieCommitMetadata commitMetadata;
    if (commitActionType == HoodieTimeline.REPLACE_COMMIT_ACTION) {
        HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
        replaceMetadata.setPartitionToReplaceFileIds(partitionToReplaceFileIds);
        commitMetadata = replaceMetadata;
    } else {
        commitMetadata = new HoodieCommitMetadata();
    }
    for (HoodieWriteStat writeStat : writeStats) {
        String partition = writeStat.getPartitionPath();
        commitMetadata.addWriteStat(partition, writeStat);
    }
    LOG.info("Creating  metadata for " + operationType + " numWriteStats:" + writeStats.size() + "numReplaceFileIds:" + partitionToReplaceFileIds.values().stream().mapToInt(e -> e.size()).sum());
    return commitMetadata;
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Schema(org.apache.avro.Schema) HoodieException(org.apache.hudi.exception.HoodieException) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HashMap(java.util.HashMap) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Map(java.util.Map) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) LogManager(org.apache.log4j.LogManager) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata)

Example 15 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestCluster method createTestData.

private List<HoodieWriteStat> createTestData(Path partPath, boolean isParquetSchemaSimple, String commitTime) throws IOException, URISyntaxException {
    List<HoodieWriteStat> writeStats = new ArrayList<>();
    for (int i = 0; i < 5; i++) {
        // Create 5 files
        String fileId = UUID.randomUUID().toString();
        Path filePath = new Path(partPath.toString() + "/" + FSUtils.makeDataFileName(commitTime, "1-0-1", fileId));
        generateParquetData(filePath, isParquetSchemaSimple);
        HoodieWriteStat writeStat = new HoodieWriteStat();
        writeStat.setFileId(fileId);
        writeStat.setPath(filePath.toString());
        writeStats.add(writeStat);
    }
    return writeStats;
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) ArrayList(java.util.ArrayList)

Aggregations

HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)74 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)42 List (java.util.List)38 ArrayList (java.util.ArrayList)33 HashMap (java.util.HashMap)32 Map (java.util.Map)32 Path (org.apache.hadoop.fs.Path)28 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)24 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)23 IOException (java.io.IOException)22 Option (org.apache.hudi.common.util.Option)19 Collectors (java.util.stream.Collectors)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 WriteStatus (org.apache.hudi.client.WriteStatus)17 HoodieReplaceCommitMetadata (org.apache.hudi.common.model.HoodieReplaceCommitMetadata)17 LogManager (org.apache.log4j.LogManager)16 Logger (org.apache.log4j.Logger)16 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)15 FileSlice (org.apache.hudi.common.model.FileSlice)14 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)14