Search in sources :

Example 41 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class FSUtils method getFs.

public static FileSystem getFs(String path, Configuration conf) {
    FileSystem fs;
    prepareHadoopConf(conf);
    try {
        fs = new Path(path).getFileSystem(conf);
    } catch (IOException e) {
        throw new HoodieIOException("Failed to get instance of " + FileSystem.class.getName(), e);
    }
    return fs;
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieIOException(org.apache.hudi.exception.HoodieIOException) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 42 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class FSUtils method getFileStatusAtLevel.

/**
 * Lists file status at a certain level in the directory hierarchy.
 * <p>
 * E.g., given "/tmp/hoodie_table" as the rootPath, and 3 as the expected level,
 * this method gives back the {@link FileStatus} of all files under
 * "/tmp/hoodie_table/[*]/[*]/[*]/" folders.
 *
 * @param hoodieEngineContext {@link HoodieEngineContext} instance.
 * @param fs                  {@link FileSystem} instance.
 * @param rootPath            Root path for the file listing.
 * @param expectLevel         Expected level of directory hierarchy for files to be added.
 * @param parallelism         Parallelism for the file listing.
 * @return A list of file status of files at the level.
 */
public static List<FileStatus> getFileStatusAtLevel(HoodieEngineContext hoodieEngineContext, FileSystem fs, Path rootPath, int expectLevel, int parallelism) {
    List<String> levelPaths = new ArrayList<>();
    List<FileStatus> result = new ArrayList<>();
    levelPaths.add(rootPath.toString());
    for (int i = 0; i <= expectLevel; i++) {
        result = FSUtils.parallelizeFilesProcess(hoodieEngineContext, fs, parallelism, pairOfSubPathAndConf -> {
            Path path = new Path(pairOfSubPathAndConf.getKey());
            try {
                FileSystem fileSystem = path.getFileSystem(pairOfSubPathAndConf.getValue().get());
                return Arrays.stream(fileSystem.listStatus(path)).collect(Collectors.toList());
            } catch (IOException e) {
                throw new HoodieIOException("Failed to list " + path, e);
            }
        }, levelPaths).values().stream().flatMap(list -> list.stream()).collect(Collectors.toList());
        if (i < expectLevel) {
            levelPaths = result.stream().filter(FileStatus::isDirectory).map(fileStatus -> fileStatus.getPath().toString()).collect(Collectors.toList());
        }
    }
    return result;
}
Also used : ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) Arrays(java.util.Arrays) InvalidHoodiePathException(org.apache.hudi.exception.InvalidHoodiePathException) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieException(org.apache.hudi.exception.HoodieException) PathFilter(org.apache.hadoop.fs.PathFilter) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) FileStatus(org.apache.hadoop.fs.FileStatus) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) Matcher(java.util.regex.Matcher) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) HoodieTableMetadata(org.apache.hudi.metadata.HoodieTableMetadata) Predicate(java.util.function.Predicate) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) Set(java.util.Set) IOException(java.io.IOException) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) Serializable(java.io.Serializable) Objects(java.util.Objects) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) List(java.util.List) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodiePartitionMetadata(org.apache.hudi.common.model.HoodiePartitionMetadata) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) Entry(java.util.Map.Entry) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) Pattern(java.util.regex.Pattern) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) Pair(org.apache.hudi.common.util.collection.Pair) Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HoodieIOException(org.apache.hudi.exception.HoodieIOException) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) ArrayList(java.util.ArrayList) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 43 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieMetadataMetrics method getStats.

public Map<String, String> getStats(boolean detailed, HoodieTableMetaClient metaClient, HoodieTableMetadata metadata) {
    try {
        metaClient.reloadActiveTimeline();
        HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline());
        return getStats(fsView, detailed, metadata);
    } catch (IOException ioe) {
        throw new HoodieIOException("Unable to get metadata stats.", ioe);
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView)

Example 44 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieHFileReader method close.

@Override
public synchronized void close() {
    try {
        reader.close();
        reader = null;
        if (fsDataInputStream != null) {
            fsDataInputStream.close();
        }
        keyScanner = null;
    } catch (IOException e) {
        throw new HoodieIOException("Error closing the hfile reader", e);
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 45 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieHFileReader method getRecordIterator.

@Override
public Iterator getRecordIterator(Schema readerSchema) throws IOException {
    final HFileScanner scanner = reader.getScanner(false, false);
    final Option<Schema.Field> keyFieldSchema = Option.ofNullable(readerSchema.getField(KEY_FIELD_NAME));
    ValidationUtils.checkState(keyFieldSchema != null, "Missing key field '" + KEY_FIELD_NAME + "' in the schema!");
    return new Iterator<R>() {

        private R next = null;

        private boolean eof = false;

        @Override
        public boolean hasNext() {
            try {
                // To handle when hasNext() is called multiple times for idempotency and/or the first time
                if (this.next == null && !this.eof) {
                    if (!scanner.isSeeked() && scanner.seekTo()) {
                        final Pair<String, R> keyAndRecordPair = getRecordFromCell(scanner.getKeyValue(), getSchema(), readerSchema, keyFieldSchema);
                        this.next = keyAndRecordPair.getSecond();
                    }
                }
                return this.next != null;
            } catch (IOException io) {
                throw new HoodieIOException("unable to read next record from hfile ", io);
            }
        }

        @Override
        public R next() {
            try {
                // To handle case when next() is called before hasNext()
                if (this.next == null) {
                    if (!hasNext()) {
                        throw new HoodieIOException("No more records left to read from hfile");
                    }
                }
                R retVal = this.next;
                if (scanner.next()) {
                    final Pair<String, R> keyAndRecordPair = getRecordFromCell(scanner.getKeyValue(), getSchema(), readerSchema, keyFieldSchema);
                    this.next = keyAndRecordPair.getSecond();
                } else {
                    this.next = null;
                    this.eof = true;
                }
                return retVal;
            } catch (IOException io) {
                throw new HoodieIOException("unable to read next record from parquet file ", io);
            }
        }
    };
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) Iterator(java.util.Iterator) HFileScanner(org.apache.hadoop.hbase.io.hfile.HFileScanner) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Aggregations

HoodieIOException (org.apache.hudi.exception.HoodieIOException)139 IOException (java.io.IOException)127 Path (org.apache.hadoop.fs.Path)45 List (java.util.List)31 ArrayList (java.util.ArrayList)30 Option (org.apache.hudi.common.util.Option)27 Collectors (java.util.stream.Collectors)26 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)26 Pair (org.apache.hudi.common.util.collection.Pair)25 LogManager (org.apache.log4j.LogManager)25 Logger (org.apache.log4j.Logger)25 Map (java.util.Map)21 FileSystem (org.apache.hadoop.fs.FileSystem)20 GenericRecord (org.apache.avro.generic.GenericRecord)19 HashSet (java.util.HashSet)18 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 Set (java.util.Set)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)17 HoodieException (org.apache.hudi.exception.HoodieException)17