Search in sources :

Example 66 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class BufferedConnectWriter method flushRecords.

@Override
public List<WriteStatus> flushRecords() {
    try {
        LOG.info("Number of entries in MemoryBasedMap => " + bufferedRecords.getInMemoryMapNumEntries() + "Total size in bytes of MemoryBasedMap => " + bufferedRecords.getCurrentInMemoryMapSize() + "Number of entries in BitCaskDiskMap => " + bufferedRecords.getDiskBasedMapNumEntries() + "Size of file spilled to disk => " + bufferedRecords.getSizeOfFileOnDiskInBytes());
        List<WriteStatus> writeStatuses = new ArrayList<>();
        boolean isMorTable = Option.ofNullable(connectConfigs.getString(HoodieTableConfig.TYPE)).map(t -> t.equals(HoodieTableType.MERGE_ON_READ.name())).orElse(false);
        // Write out all records if non-empty
        if (!bufferedRecords.isEmpty()) {
            if (isMorTable) {
                writeStatuses = writeClient.upsertPreppedRecords(new LinkedList<>(bufferedRecords.values()), instantTime);
            } else {
                writeStatuses = writeClient.bulkInsertPreppedRecords(new LinkedList<>(bufferedRecords.values()), instantTime, Option.empty());
            }
        }
        bufferedRecords.close();
        LOG.info("Flushed hudi records and got writeStatuses: " + writeStatuses);
        return writeStatuses;
    } catch (Exception e) {
        throw new HoodieIOException("Write records failed", new IOException(e));
    }
}
Also used : HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Schema(org.apache.avro.Schema) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieRecordSizeEstimator(org.apache.hudi.common.util.HoodieRecordSizeEstimator) HoodieJavaWriteClient(org.apache.hudi.client.HoodieJavaWriteClient) ArrayList(java.util.ArrayList) WriteStatus(org.apache.hudi.client.WriteStatus) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) KeyGenerator(org.apache.hudi.keygen.KeyGenerator) List(java.util.List) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) HoodieIOException(org.apache.hudi.exception.HoodieIOException) SchemaProvider(org.apache.hudi.schema.SchemaProvider) ExternalSpillableMap(org.apache.hudi.common.util.collection.ExternalSpillableMap) IOUtils(org.apache.hudi.io.IOUtils) LogManager(org.apache.log4j.LogManager) LinkedList(java.util.LinkedList) HoodieIOException(org.apache.hudi.exception.HoodieIOException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) WriteStatus(org.apache.hudi.client.WriteStatus) LinkedList(java.util.LinkedList) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 67 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class BulkInsertDataInternalWriterHelper method getKeyGenerator.

/**
 * Instantiate {@link BuiltinKeyGenerator}.
 *
 * @param properties properties map.
 * @return the key generator thus instantiated.
 */
private Option<BuiltinKeyGenerator> getKeyGenerator(Properties properties) {
    TypedProperties typedProperties = new TypedProperties();
    typedProperties.putAll(properties);
    if (properties.get(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key()).equals(NonpartitionedKeyGenerator.class.getName())) {
        // Do not instantiate NonPartitionKeyGen
        return Option.empty();
    } else {
        try {
            return Option.of((BuiltinKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(typedProperties));
        } catch (ClassCastException cce) {
            throw new HoodieIOException("Only those key generators implementing BuiltInKeyGenerator interface is supported with virtual keys");
        } catch (IOException e) {
            throw new HoodieIOException("Key generator instantiation failed ", e);
        }
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) TypedProperties(org.apache.hudi.common.config.TypedProperties) NonpartitionedKeyGenerator(org.apache.hudi.keygen.NonpartitionedKeyGenerator)

Example 68 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class SparkFullBootstrapDataProviderBase method generateInputRecords.

@Override
public JavaRDD<HoodieRecord> generateInputRecords(String tableName, String sourceBasePath, List<Pair<String, List<HoodieFileStatus>>> partitionPathsWithFiles) {
    String[] filePaths = partitionPathsWithFiles.stream().map(Pair::getValue).flatMap(f -> f.stream().map(fs -> FileStatusUtils.toPath(fs.getPath()).toString())).toArray(String[]::new);
    Dataset inputDataset = sparkSession.read().format(getFormat()).load(filePaths);
    try {
        KeyGenerator keyGenerator = HoodieSparkKeyGeneratorFactory.createKeyGenerator(props);
        String structName = tableName + "_record";
        String namespace = "hoodie." + tableName;
        RDD<GenericRecord> genericRecords = HoodieSparkUtils.createRdd(inputDataset, structName, namespace, false, Option.empty());
        return genericRecords.toJavaRDD().map(gr -> {
            String orderingVal = HoodieAvroUtils.getNestedFieldValAsString(gr, props.getString("hoodie.datasource.write.precombine.field"), false, props.getBoolean(KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.key(), Boolean.parseBoolean(KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.defaultValue())));
            try {
                return DataSourceUtils.createHoodieRecord(gr, orderingVal, keyGenerator.getKey(gr), props.getString("hoodie.datasource.write.payload.class"));
            } catch (IOException ioe) {
                throw new HoodieIOException(ioe.getMessage(), ioe);
            }
        });
    } catch (IOException ioe) {
        throw new HoodieIOException(ioe.getMessage(), ioe);
    }
}
Also used : HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) TypedProperties(org.apache.hudi.common.config.TypedProperties) Dataset(org.apache.spark.sql.Dataset) KeyGeneratorOptions(org.apache.hudi.keygen.constant.KeyGeneratorOptions) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) DataSourceUtils(org.apache.hudi.DataSourceUtils) KeyGenerator(org.apache.hudi.keygen.KeyGenerator) List(java.util.List) HoodieSparkUtils(org.apache.hudi.HoodieSparkUtils) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) FileStatusUtils(org.apache.hudi.common.bootstrap.FileStatusUtils) HoodieIOException(org.apache.hudi.exception.HoodieIOException) RDD(org.apache.spark.rdd.RDD) HoodieSparkKeyGeneratorFactory(org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory) JavaRDD(org.apache.spark.api.java.JavaRDD) FullRecordBootstrapDataProvider(org.apache.hudi.client.bootstrap.FullRecordBootstrapDataProvider) Pair(org.apache.hudi.common.util.collection.Pair) SparkSession(org.apache.spark.sql.SparkSession) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Dataset(org.apache.spark.sql.Dataset) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) GenericRecord(org.apache.avro.generic.GenericRecord) KeyGenerator(org.apache.hudi.keygen.KeyGenerator) Pair(org.apache.hudi.common.util.collection.Pair)

Example 69 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class RestorePlanActionExecutor method execute.

@Override
public Option<HoodieRestorePlan> execute() {
    final HoodieInstant restoreInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.RESTORE_ACTION, instantTime);
    try {
        // Get all the commits on the timeline after the provided commit time
        // rollback pending clustering instants first before other instants (See HUDI-3362)
        List<HoodieInstant> pendingClusteringInstantsToRollback = table.getActiveTimeline().filterPendingReplaceTimeline().filter(instant -> ClusteringUtils.isPendingClusteringInstant(table.getMetaClient(), instant)).getReverseOrderedInstants().filter(instant -> HoodieActiveTimeline.GREATER_THAN.test(instant.getTimestamp(), restoreInstantTime)).collect(Collectors.toList());
        // Get all the commits on the timeline after the provided commit time
        List<HoodieInstant> commitInstantsToRollback = table.getActiveTimeline().getWriteTimeline().getReverseOrderedInstants().filter(instant -> HoodieActiveTimeline.GREATER_THAN.test(instant.getTimestamp(), restoreInstantTime)).filter(instant -> !pendingClusteringInstantsToRollback.contains(instant)).collect(Collectors.toList());
        // Combine both lists - first rollback pending clustering and then rollback all other commits
        List<HoodieInstantInfo> instantsToRollback = Stream.concat(pendingClusteringInstantsToRollback.stream(), commitInstantsToRollback.stream()).map(entry -> new HoodieInstantInfo(entry.getTimestamp(), entry.getAction())).collect(Collectors.toList());
        HoodieRestorePlan restorePlan = new HoodieRestorePlan(instantsToRollback, LATEST_RESTORE_PLAN_VERSION);
        table.getActiveTimeline().saveToRestoreRequested(restoreInstant, TimelineMetadataUtils.serializeRestorePlan(restorePlan));
        table.getMetaClient().reloadActiveTimeline();
        LOG.info("Requesting Restore with instant time " + restoreInstant);
        return Option.of(restorePlan);
    } catch (IOException e) {
        LOG.error("Got exception when saving restore requested file", e);
        throw new HoodieIOException(e.getMessage(), e);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) BaseActionExecutor(org.apache.hudi.table.action.BaseActionExecutor) HoodieRestorePlan(org.apache.hudi.avro.model.HoodieRestorePlan) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) Collectors(java.util.stream.Collectors) HoodieInstantInfo(org.apache.hudi.avro.model.HoodieInstantInfo) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) Logger(org.apache.log4j.Logger) List(java.util.List) Stream(java.util.stream.Stream) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieInstantInfo(org.apache.hudi.avro.model.HoodieInstantInfo) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieRestorePlan(org.apache.hudi.avro.model.HoodieRestorePlan)

Example 70 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class BaseRollbackActionExecutor method doRollbackAndGetStats.

public List<HoodieRollbackStat> doRollbackAndGetStats(HoodieRollbackPlan hoodieRollbackPlan) {
    final String instantTimeToRollback = instantToRollback.getTimestamp();
    final boolean isPendingCompaction = Objects.equals(HoodieTimeline.COMPACTION_ACTION, instantToRollback.getAction()) && !instantToRollback.isCompleted();
    final boolean isPendingClustering = Objects.equals(HoodieTimeline.REPLACE_COMMIT_ACTION, instantToRollback.getAction()) && !instantToRollback.isCompleted() && ClusteringUtils.getClusteringPlan(table.getMetaClient(), instantToRollback).isPresent();
    validateSavepointRollbacks();
    if (!isPendingCompaction && !isPendingClustering) {
        validateRollbackCommitSequence();
    }
    try {
        List<HoodieRollbackStat> stats = executeRollback(hoodieRollbackPlan);
        LOG.info("Rolled back inflight instant " + instantTimeToRollback);
        if (!isPendingCompaction) {
            rollBackIndex();
        }
        return stats;
    } catch (IOException e) {
        throw new HoodieIOException("Unable to execute rollback ", e);
    }
}
Also used : HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Aggregations

HoodieIOException (org.apache.hudi.exception.HoodieIOException)139 IOException (java.io.IOException)127 Path (org.apache.hadoop.fs.Path)45 List (java.util.List)31 ArrayList (java.util.ArrayList)30 Option (org.apache.hudi.common.util.Option)27 Collectors (java.util.stream.Collectors)26 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)26 Pair (org.apache.hudi.common.util.collection.Pair)25 LogManager (org.apache.log4j.LogManager)25 Logger (org.apache.log4j.Logger)25 Map (java.util.Map)21 FileSystem (org.apache.hadoop.fs.FileSystem)20 GenericRecord (org.apache.avro.generic.GenericRecord)19 HashSet (java.util.HashSet)18 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 Set (java.util.Set)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)17 HoodieException (org.apache.hudi.exception.HoodieException)17