Search in sources :

Example 1 with HoodiePartitionMetadata

use of org.apache.hudi.common.model.HoodiePartitionMetadata in project hudi by apache.

the class HoodieClientTestBase method assertPartitionMetadata.

/**
 * Ensure presence of partition meta-data at known depth.
 *
 * @param partitionPaths Partition paths to check
 * @param fs File System
 * @throws IOException in case of error
 */
public void assertPartitionMetadata(String[] partitionPaths, FileSystem fs) throws IOException {
    for (String partitionPath : partitionPaths) {
        assertTrue(HoodiePartitionMetadata.hasPartitionMetadata(fs, new Path(basePath, partitionPath)));
        HoodiePartitionMetadata pmeta = new HoodiePartitionMetadata(fs, new Path(basePath, partitionPath));
        pmeta.readFromFS();
        assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_DEPTH, pmeta.getPartitionDepth());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodiePartitionMetadata(org.apache.hudi.common.model.HoodiePartitionMetadata)

Example 2 with HoodiePartitionMetadata

use of org.apache.hudi.common.model.HoodiePartitionMetadata in project hudi by apache.

the class HoodieAppendHandle method init.

private void init(HoodieRecord record) {
    if (doInit) {
        // extract some information from the first record
        SliceView rtView = hoodieTable.getSliceView();
        Option<FileSlice> fileSlice = rtView.getLatestFileSlice(partitionPath, fileId);
        // Set the base commit time as the current instantTime for new inserts into log files
        String baseInstantTime;
        String baseFile = "";
        List<String> logFiles = new ArrayList<>();
        if (fileSlice.isPresent()) {
            baseInstantTime = fileSlice.get().getBaseInstantTime();
            baseFile = fileSlice.get().getBaseFile().map(BaseFile::getFileName).orElse("");
            logFiles = fileSlice.get().getLogFiles().map(HoodieLogFile::getFileName).collect(Collectors.toList());
        } else {
            baseInstantTime = instantTime;
            // This means there is no base data file, start appending to a new log file
            fileSlice = Option.of(new FileSlice(partitionPath, baseInstantTime, this.fileId));
            LOG.info("New AppendHandle for partition :" + partitionPath);
        }
        // Prepare the first write status
        writeStatus.setStat(new HoodieDeltaWriteStat());
        writeStatus.setFileId(fileId);
        writeStatus.setPartitionPath(partitionPath);
        averageRecordSize = sizeEstimator.sizeEstimate(record);
        HoodieDeltaWriteStat deltaWriteStat = (HoodieDeltaWriteStat) writeStatus.getStat();
        deltaWriteStat.setPrevCommit(baseInstantTime);
        deltaWriteStat.setPartitionPath(partitionPath);
        deltaWriteStat.setFileId(fileId);
        deltaWriteStat.setBaseFile(baseFile);
        deltaWriteStat.setLogFiles(logFiles);
        try {
            // Save hoodie partition meta in the partition path
            HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, baseInstantTime, new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
            partitionMetadata.trySave(getPartitionId());
            // Since the actual log file written to can be different based on when rollover happens, we use the
            // base file to denote some log appends happened on a slice. writeToken will still fence concurrent
            // writers.
            // https://issues.apache.org/jira/browse/HUDI-1517
            createMarkerFile(partitionPath, FSUtils.makeDataFileName(baseInstantTime, writeToken, fileId, hoodieTable.getBaseFileExtension()));
            this.writer = createLogWriter(fileSlice, baseInstantTime);
        } catch (Exception e) {
            LOG.error("Error in update task at commit " + instantTime, e);
            writeStatus.setGlobalError(e);
            throw new HoodieUpsertException("Failed to initialize HoodieAppendHandle for FileId: " + fileId + " on commit " + instantTime + " on HDFS path " + hoodieTable.getMetaClient().getBasePath() + "/" + partitionPath, e);
        }
        doInit = false;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) HoodiePartitionMetadata(org.apache.hudi.common.model.HoodiePartitionMetadata) HoodieException(org.apache.hudi.exception.HoodieException) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) HoodieAppendException(org.apache.hudi.exception.HoodieAppendException) IOException(java.io.IOException) SliceView(org.apache.hudi.common.table.view.TableFileSystemView.SliceView) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieDeltaWriteStat(org.apache.hudi.common.model.HoodieDeltaWriteStat) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile)

Example 3 with HoodiePartitionMetadata

use of org.apache.hudi.common.model.HoodiePartitionMetadata in project hudi by apache.

the class HoodieMergeHandle method init.

/**
 * Extract old file path, initialize StorageWriter and WriteStatus.
 */
private void init(String fileId, String partitionPath, HoodieBaseFile baseFileToMerge) {
    LOG.info("partitionPath:" + partitionPath + ", fileId to be merged:" + fileId);
    this.baseFileToMerge = baseFileToMerge;
    this.writtenRecordKeys = new HashSet<>();
    writeStatus.setStat(new HoodieWriteStat());
    try {
        String latestValidFilePath = baseFileToMerge.getFileName();
        writeStatus.getStat().setPrevCommit(FSUtils.getCommitTime(latestValidFilePath));
        HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, instantTime, new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
        partitionMetadata.trySave(getPartitionId());
        String newFileName = FSUtils.makeDataFileName(instantTime, writeToken, fileId, hoodieTable.getBaseFileExtension());
        makeOldAndNewFilePaths(partitionPath, latestValidFilePath, newFileName);
        LOG.info(String.format("Merging new data into oldPath %s, as newPath %s", oldFilePath.toString(), newFilePath.toString()));
        // file name is same for all records, in this bunch
        writeStatus.setFileId(fileId);
        writeStatus.setPartitionPath(partitionPath);
        writeStatus.getStat().setPartitionPath(partitionPath);
        writeStatus.getStat().setFileId(fileId);
        setWriteStatusPath();
        // Create Marker file
        createMarkerFile(partitionPath, newFileName);
        // Create the writer for writing the new version file
        fileWriter = createNewFileWriter(instantTime, newFilePath, hoodieTable, config, writeSchemaWithMetaFields, taskContextSupplier);
    } catch (IOException io) {
        LOG.error("Error in update task at commit " + instantTime, io);
        writeStatus.setGlobalError(io);
        throw new HoodieUpsertException("Failed to initialize HoodieUpdateHandle for FileId: " + fileId + " on commit " + instantTime + " on path " + hoodieTable.getMetaClient().getBasePath(), io);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) HoodiePartitionMetadata(org.apache.hudi.common.model.HoodiePartitionMetadata) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 4 with HoodiePartitionMetadata

use of org.apache.hudi.common.model.HoodiePartitionMetadata in project hudi by apache.

the class TestTablePathUtils method setup.

@BeforeAll
static void setup() throws IOException {
    URI tablePathURI = Paths.get(tempDir.getAbsolutePath(), "test_table").toUri();
    tablePath = new Path(tablePathURI);
    fs = tablePath.getFileSystem(new Configuration());
    // Create bootstrap index folder
    assertTrue(new File(Paths.get(tablePathURI.getPath(), HoodieTableMetaClient.BOOTSTRAP_INDEX_ROOT_FOLDER_PATH).toUri()).mkdirs());
    // Create partition folders
    URI partitionPathURI1 = Paths.get(tablePathURI.getPath(), "key1=abc/key2=def").toUri();
    partitionPath1 = new Path(partitionPathURI1);
    URI partitionPathURI2 = Paths.get(tablePathURI.getPath(), "key1=xyz/key2=def").toUri();
    partitionPath2 = new Path(partitionPathURI2);
    assertTrue(new File(partitionPathURI1).mkdirs());
    assertTrue(new File(partitionPathURI2).mkdirs());
    HoodiePartitionMetadata partitionMetadata1 = new HoodiePartitionMetadata(fs, Instant.now().toString(), tablePath, partitionPath1);
    partitionMetadata1.trySave(1);
    HoodiePartitionMetadata partitionMetadata2 = new HoodiePartitionMetadata(fs, Instant.now().toString(), tablePath, partitionPath2);
    partitionMetadata2.trySave(2);
    // Create files
    URI filePathURI1 = Paths.get(partitionPathURI1.getPath(), "data1" + BASE_FILE_EXTENSION).toUri();
    filePath1 = new Path(filePathURI1);
    URI filePathURI2 = Paths.get(partitionPathURI2.getPath(), "data2" + BASE_FILE_EXTENSION).toUri();
    filePath2 = new Path(filePathURI2);
    assertTrue(new File(filePathURI1).createNewFile());
    assertTrue(new File(filePathURI2).createNewFile());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) HoodiePartitionMetadata(org.apache.hudi.common.model.HoodiePartitionMetadata) URI(java.net.URI) File(java.io.File) BeforeAll(org.junit.jupiter.api.BeforeAll)

Example 5 with HoodiePartitionMetadata

use of org.apache.hudi.common.model.HoodiePartitionMetadata in project hudi by apache.

the class HoodieInputFormatUtils method getTableMetaClientForBasePathUnchecked.

/**
 * Extract HoodieTableMetaClient from a partition path (not base path)
 */
public static HoodieTableMetaClient getTableMetaClientForBasePathUnchecked(Configuration conf, Path partitionPath) throws IOException {
    FileSystem fs = partitionPath.getFileSystem(conf);
    int levels = HoodieHiveUtils.DEFAULT_LEVELS_TO_BASEPATH;
    if (HoodiePartitionMetadata.hasPartitionMetadata(fs, partitionPath)) {
        HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, partitionPath);
        metadata.readFromFS();
        levels = metadata.getPartitionDepth();
    }
    Path baseDir = HoodieHiveUtils.getNthParent(partitionPath, levels);
    LOG.info("Reading hoodie metadata from path " + baseDir.toString());
    return HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(baseDir.toString()).build();
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) HoodiePartitionMetadata(org.apache.hudi.common.model.HoodiePartitionMetadata)

Aggregations

Path (org.apache.hadoop.fs.Path)9 HoodiePartitionMetadata (org.apache.hudi.common.model.HoodiePartitionMetadata)9 IOException (java.io.IOException)3 HoodieException (org.apache.hudi.exception.HoodieException)3 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)2 HoodieUpsertException (org.apache.hudi.exception.HoodieUpsertException)2 File (java.io.File)1 URI (java.net.URI)1 ArrayList (java.util.ArrayList)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)1 RawLocalFileSystem (org.apache.hadoop.fs.RawLocalFileSystem)1 HoodieLocalEngineContext (org.apache.hudi.common.engine.HoodieLocalEngineContext)1 BaseFile (org.apache.hudi.common.model.BaseFile)1 FileSlice (org.apache.hudi.common.model.FileSlice)1 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)1 HoodieDeltaWriteStat (org.apache.hudi.common.model.HoodieDeltaWriteStat)1 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)1 HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)1