Search in sources :

Example 1 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieRowDataCreateHandle method makeNewPath.

private Path makeNewPath(String partitionPath) {
    Path path = FSUtils.getPartitionPath(writeConfig.getBasePath(), partitionPath);
    try {
        if (!fs.exists(path)) {
            // create a new partition as needed.
            fs.mkdirs(path);
        }
    } catch (IOException e) {
        throw new HoodieIOException("Failed to make dir " + path, e);
    }
    HoodieTableConfig tableConfig = table.getMetaClient().getTableConfig();
    return new Path(path.toString(), FSUtils.makeDataFileName(instantTime, getWriteToken(), fileId, tableConfig.getBaseFileFormat().getFileExtension()));
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 2 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class TestHoodieSparkMergeOnReadTableRollback method testInsertsGeneratedIntoLogFilesRollback.

@ParameterizedTest
@ValueSource(booleans = { true, false })
void testInsertsGeneratedIntoLogFilesRollback(boolean rollbackUsingMarkers) throws Exception {
    Properties properties = new Properties();
    properties.setProperty(HoodieTableConfig.BASE_FILE_FORMAT.key(), HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().toString());
    HoodieTableMetaClient metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, properties);
    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
    // insert 100 records
    // Setting IndexType to be InMemory to simulate Global Index nature
    HoodieWriteConfig config = getConfigBuilder(false, rollbackUsingMarkers, HoodieIndex.IndexType.INMEMORY).build();
    try (SparkRDDWriteClient writeClient = getHoodieWriteClient(config)) {
        String newCommitTime = "100";
        writeClient.startCommitWithTime(newCommitTime);
        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
        JavaRDD<HoodieRecord> recordsRDD = jsc().parallelize(records, 1);
        // trigger an action
        List<WriteStatus> writeStatuses = ((JavaRDD<WriteStatus>) writeClient.insert(recordsRDD, newCommitTime)).collect();
        // Ensure that inserts are written to only log files
        assertEquals(0, writeStatuses.stream().filter(writeStatus -> !writeStatus.getStat().getPath().contains("log")).count());
        assertTrue(writeStatuses.stream().anyMatch(writeStatus -> writeStatus.getStat().getPath().contains("log")));
        // rollback a failed commit
        boolean rollback = writeClient.rollback(newCommitTime);
        assertTrue(rollback);
        // insert 100 records
        newCommitTime = "101";
        writeClient.startCommitWithTime(newCommitTime);
        records = dataGen.generateInserts(newCommitTime, 100);
        recordsRDD = jsc().parallelize(records, 1);
        writeClient.insert(recordsRDD, newCommitTime).collect();
        // Sleep for small interval (at least 1 second) to force a new rollback start time.
        Thread.sleep(1000);
        // We will test HUDI-204 here. We will simulate rollback happening twice by copying the commit file to local fs
        // and calling rollback twice
        final String lastCommitTime = newCommitTime;
        // Save the .commit file to local directory.
        // Rollback will be called twice to test the case where rollback failed first time and retried.
        // We got the "BaseCommitTime cannot be null" exception before the fix
        java.nio.file.Path tempFolder = Files.createTempDirectory(this.getClass().getCanonicalName());
        Map<String, String> fileNameMap = new HashMap<>();
        for (HoodieInstant.State state : Arrays.asList(HoodieInstant.State.REQUESTED, HoodieInstant.State.INFLIGHT)) {
            HoodieInstant toCopy = new HoodieInstant(state, HoodieTimeline.DELTA_COMMIT_ACTION, lastCommitTime);
            File file = Files.createTempFile(tempFolder, null, null).toFile();
            metaClient.getFs().copyToLocalFile(new Path(metaClient.getMetaPath(), toCopy.getFileName()), new Path(file.getAbsolutePath()));
            fileNameMap.put(file.getAbsolutePath(), toCopy.getFileName());
        }
        Path markerDir = new Path(Files.createTempDirectory(tempFolder, null).toAbsolutePath().toString());
        if (rollbackUsingMarkers) {
            metaClient.getFs().copyToLocalFile(new Path(metaClient.getMarkerFolderPath(lastCommitTime)), markerDir);
        }
        writeClient.rollback(newCommitTime);
        metaClient = HoodieTableMetaClient.reload(metaClient);
        HoodieTable table = HoodieSparkTable.create(config, context());
        TableFileSystemView.SliceView tableRTFileSystemView = table.getSliceView();
        long numLogFiles = 0;
        for (String partitionPath : dataGen.getPartitionPaths()) {
            assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getBaseFile().isPresent()));
            assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).noneMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
            numLogFiles += tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
        }
        assertEquals(0, numLogFiles);
        for (Map.Entry<String, String> entry : fileNameMap.entrySet()) {
            try {
                metaClient.getFs().copyFromLocalFile(new Path(entry.getKey()), new Path(metaClient.getMetaPath(), entry.getValue()));
            } catch (IOException e) {
                throw new HoodieIOException("Error copying state from local disk.", e);
            }
        }
        if (rollbackUsingMarkers) {
            metaClient.getFs().copyFromLocalFile(new Path(markerDir, lastCommitTime), new Path(metaClient.getMarkerFolderPath(lastCommitTime)));
        }
        Thread.sleep(1000);
        // Rollback again to pretend the first rollback failed partially. This should not error out
        writeClient.rollback(newCommitTime);
    }
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieMergeOnReadTestUtils(org.apache.hudi.testutils.HoodieMergeOnReadTestUtils) Arrays(java.util.Arrays) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Assertions.assertNotEquals(org.junit.jupiter.api.Assertions.assertNotEquals) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) Map(java.util.Map) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Path(org.apache.hadoop.fs.Path) Assertions.assertAll(org.junit.jupiter.api.Assertions.assertAll) Tag(org.junit.jupiter.api.Tag) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) TRIP_EXAMPLE_SCHEMA(org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA) Collection(java.util.Collection) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MarkerType(org.apache.hudi.common.table.marker.MarkerType) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) ValueSource(org.junit.jupiter.params.provider.ValueSource) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) Properties(java.util.Properties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Files(java.nio.file.Files) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) SparkClientFunctionalTestHarness(org.apache.hudi.testutils.SparkClientFunctionalTestHarness) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Pair(org.apache.hudi.common.util.collection.Pair) HashMap(java.util.HashMap) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Properties(java.util.Properties) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 3 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieAvroUtils method indexedRecordToBytes.

public static <T extends IndexedRecord> byte[] indexedRecordToBytes(T record) {
    GenericDatumWriter<T> writer = new GenericDatumWriter<>(record.getSchema());
    try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
        BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, reuseEncoder.get());
        reuseEncoder.set(encoder);
        writer.write(record, encoder);
        encoder.flush();
        return out.toByteArray();
    } catch (IOException e) {
        throw new HoodieIOException("Cannot convert GenericRecord to bytes", e);
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) BinaryEncoder(org.apache.avro.io.BinaryEncoder) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 4 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HFileBootstrapIndex method dropIndex.

@Override
public void dropIndex() {
    try {
        Path[] indexPaths = new Path[] { partitionIndexPath(metaClient), fileIdIndexPath(metaClient) };
        for (Path indexPath : indexPaths) {
            if (metaClient.getFs().exists(indexPath)) {
                LOG.info("Dropping bootstrap index. Deleting file : " + indexPath);
                metaClient.getFs().delete(indexPath);
            }
        }
    } catch (IOException ioe) {
        throw new HoodieIOException(ioe.getMessage(), ioe);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 5 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HFileBootstrapIndex method createReader.

/**
 * Helper method to create HFile Reader.
 *
 * @param hFilePath File Path
 * @param conf Configuration
 * @param fileSystem File System
 */
private static HFile.Reader createReader(String hFilePath, Configuration conf, FileSystem fileSystem) {
    try {
        LOG.info("Opening HFile for reading :" + hFilePath);
        HFile.Reader reader = HFile.createReader(fileSystem, new HFilePathForReader(hFilePath), new CacheConfig(conf), conf);
        return reader;
    } catch (IOException ioe) {
        throw new HoodieIOException(ioe.getMessage(), ioe);
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HFile(org.apache.hadoop.hbase.io.hfile.HFile) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig)

Aggregations

HoodieIOException (org.apache.hudi.exception.HoodieIOException)139 IOException (java.io.IOException)127 Path (org.apache.hadoop.fs.Path)45 List (java.util.List)31 ArrayList (java.util.ArrayList)30 Option (org.apache.hudi.common.util.Option)27 Collectors (java.util.stream.Collectors)26 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)26 Pair (org.apache.hudi.common.util.collection.Pair)25 LogManager (org.apache.log4j.LogManager)25 Logger (org.apache.log4j.Logger)25 Map (java.util.Map)21 FileSystem (org.apache.hadoop.fs.FileSystem)20 GenericRecord (org.apache.avro.generic.GenericRecord)19 HashSet (java.util.HashSet)18 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 Set (java.util.Set)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)17 HoodieException (org.apache.hudi.exception.HoodieException)17