Search in sources :

Example 1 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class FlinkMergeAndReplaceHandle method deleteInvalidDataFile.

/**
 * The flink checkpoints start in sequence and asynchronously, when one write task finish the checkpoint(A)
 * (thus the fs view got the written data files some of which may be invalid),
 * it goes on with the next round checkpoint(B) write immediately,
 * if it tries to reuse the last small data bucket(small file) of an invalid data file,
 * finally, when the coordinator receives the checkpoint success event of checkpoint(A),
 * the invalid data file would be cleaned,
 * and this merger got a FileNotFoundException when it close the write file handle.
 *
 * <p> To solve, deletes the invalid data file eagerly
 * so that the invalid file small bucket would never be reused.
 *
 * @param lastAttemptId The last attempt ID
 */
private void deleteInvalidDataFile(long lastAttemptId) {
    final String lastWriteToken = FSUtils.makeWriteToken(getPartitionId(), getStageId(), lastAttemptId);
    final String lastDataFileName = FSUtils.makeDataFileName(instantTime, lastWriteToken, this.fileId, hoodieTable.getBaseFileExtension());
    final Path path = makeNewFilePath(partitionPath, lastDataFileName);
    try {
        if (fs.exists(path)) {
            LOG.info("Deleting invalid MERGE and REPLACE base file due to task retry: " + lastDataFileName);
            fs.delete(path, false);
        }
    } catch (IOException e) {
        throw new HoodieException("Error while deleting the MERGE and REPLACE base file due to task retry: " + lastDataFileName, e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 2 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class FlinkCreateHandle method makeNewPath.

@Override
public Path makeNewPath(String partitionPath) {
    Path path = super.makeNewPath(partitionPath);
    // Write to a new file which behaves like a different task write.
    try {
        int rollNumber = 0;
        while (fs.exists(path)) {
            Path existing = path;
            path = newFilePathWithRollover(rollNumber++);
            LOG.warn("Duplicate write for INSERT bucket with path: " + existing + ", rolls over to new path: " + path);
        }
        return path;
    } catch (IOException e) {
        throw new HoodieException("Checking existing path for create handle error: " + path, e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException)

Example 3 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class FlinkCreateHandle method deleteInvalidDataFile.

/**
 * The flink checkpoints start in sequence and asynchronously, when one write task finish the checkpoint(A)
 * (thus the fs view got the written data files some of which may be invalid),
 * it goes on with the next round checkpoint(B) write immediately,
 * if it tries to reuse the last small data bucket(small file) of an invalid data file,
 * finally, when the coordinator receives the checkpoint success event of checkpoint(A),
 * the invalid data file would be cleaned,
 * and this merger got a FileNotFoundException when it close the write file handle.
 *
 * <p> To solve, deletes the invalid data file eagerly
 * so that the invalid file small bucket would never be reused.
 *
 * @param lastAttemptId The last attempt ID
 */
private void deleteInvalidDataFile(long lastAttemptId) {
    final String lastWriteToken = FSUtils.makeWriteToken(getPartitionId(), getStageId(), lastAttemptId);
    final String lastDataFileName = FSUtils.makeDataFileName(instantTime, lastWriteToken, this.fileId, hoodieTable.getBaseFileExtension());
    final Path path = makeNewFilePath(partitionPath, lastDataFileName);
    try {
        if (fs.exists(path)) {
            LOG.info("Deleting invalid INSERT file due to task retry: " + lastDataFileName);
            fs.delete(path, false);
        }
    } catch (IOException e) {
        throw new HoodieException("Error while deleting the INSERT file due to task retry: " + lastDataFileName, e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException)

Example 4 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class TestBootstrapUtils method testAllLeafFoldersWithFiles.

@Test
public void testAllLeafFoldersWithFiles() throws IOException {
    // All directories including marker dirs.
    List<String> folders = Arrays.asList("2016/04/15", "2016/05/16", "2016/05/17");
    folders.forEach(f -> {
        try {
            metaClient.getFs().mkdirs(new Path(new Path(basePath), f));
        } catch (IOException e) {
            throw new HoodieException(e);
        }
    });
    // Files inside partitions and marker directories
    List<String> files = Stream.of("2016/04/15/1_1-0-1_20190528120000", "2016/04/15/2_1-0-1_20190528120000", "2016/05/16/3_1-0-1_20190528120000", "2016/05/16/4_1-0-1_20190528120000", "2016/04/17/5_1-0-1_20190528120000", "2016/04/17/6_1-0-1_20190528120000").map(file -> file + metaClient.getTableConfig().getBaseFileFormat().getFileExtension()).collect(Collectors.toList());
    files.forEach(f -> {
        try {
            metaClient.getFs().create(new Path(new Path(basePath), f));
        } catch (IOException e) {
            throw new HoodieException(e);
        }
    });
    List<Pair<String, List<HoodieFileStatus>>> collected = BootstrapUtils.getAllLeafFoldersWithFiles(metaClient, metaClient.getFs(), basePath, context);
    assertEquals(3, collected.size());
    collected.stream().forEach(k -> {
        assertEquals(2, k.getRight().size());
    });
    // Simulate reading from un-partitioned dataset
    collected = BootstrapUtils.getAllLeafFoldersWithFiles(metaClient, metaClient.getFs(), basePath + "/" + folders.get(0), context);
    assertEquals(1, collected.size());
    collected.stream().forEach(k -> {
        assertEquals(2, k.getRight().size());
    });
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) Test(org.junit.jupiter.api.Test) Arrays(java.util.Arrays) List(java.util.List) Stream(java.util.stream.Stream) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) HoodieException(org.apache.hudi.exception.HoodieException) Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) Collectors(java.util.stream.Collectors) Pair(org.apache.hudi.common.util.collection.Pair) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) Pair(org.apache.hudi.common.util.collection.Pair) Test(org.junit.jupiter.api.Test)

Example 5 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class HoodieClientTestUtils method getLatestBaseFiles.

public static List<HoodieBaseFile> getLatestBaseFiles(String basePath, FileSystem fs, String... paths) {
    List<HoodieBaseFile> latestFiles = new ArrayList<>();
    try {
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
        for (String path : paths) {
            BaseFileOnlyView fileSystemView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsTimeline().filterCompletedInstants(), fs.globStatus(new Path(path)));
            latestFiles.addAll(fileSystemView.getLatestBaseFiles().collect(Collectors.toList()));
        }
    } catch (Exception e) {
        throw new HoodieException("Error reading hoodie table as a dataframe", e);
    }
    return latestFiles;
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Path(org.apache.hadoop.fs.Path) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) ArrayList(java.util.ArrayList) HoodieException(org.apache.hudi.exception.HoodieException) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException)

Aggregations

HoodieException (org.apache.hudi.exception.HoodieException)171 IOException (java.io.IOException)87 Path (org.apache.hadoop.fs.Path)45 Schema (org.apache.avro.Schema)35 HoodieIOException (org.apache.hudi.exception.HoodieIOException)35 List (java.util.List)30 ArrayList (java.util.ArrayList)27 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)23 Collectors (java.util.stream.Collectors)21 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)19 Option (org.apache.hudi.common.util.Option)19 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)18 Map (java.util.Map)16 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)16 GenericRecord (org.apache.avro.generic.GenericRecord)15 Arrays (java.util.Arrays)14 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)14 Logger (org.apache.log4j.Logger)14 FileStatus (org.apache.hadoop.fs.FileStatus)13 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)13