Search in sources :

Example 76 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class BaseMergeHelper method transformRecordBasedOnNewSchema.

protected GenericRecord transformRecordBasedOnNewSchema(GenericDatumReader<GenericRecord> gReader, GenericDatumWriter<GenericRecord> gWriter, ThreadLocal<BinaryEncoder> encoderCache, ThreadLocal<BinaryDecoder> decoderCache, GenericRecord gRec) {
    ByteArrayOutputStream inStream = null;
    try {
        inStream = new ByteArrayOutputStream();
        BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(inStream, encoderCache.get());
        encoderCache.set(encoder);
        gWriter.write(gRec, encoder);
        encoder.flush();
        BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(inStream.toByteArray(), decoderCache.get());
        decoderCache.set(decoder);
        GenericRecord transformedRec = gReader.read(null, decoder);
        return transformedRec;
    } catch (IOException e) {
        throw new HoodieException(e);
    } finally {
        try {
            inStream.close();
        } catch (IOException ioe) {
            throw new HoodieException(ioe.getMessage(), ioe);
        }
    }
}
Also used : BinaryEncoder(org.apache.avro.io.BinaryEncoder) HoodieException(org.apache.hudi.exception.HoodieException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord) BinaryDecoder(org.apache.avro.io.BinaryDecoder)

Example 77 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class DirectWriteMarkers method create.

private Option<Path> create(Path markerPath, boolean checkIfExists) {
    HoodieTimer timer = new HoodieTimer().startTimer();
    Path dirPath = markerPath.getParent();
    try {
        if (!fs.exists(dirPath)) {
            // create a new partition as needed.
            fs.mkdirs(dirPath);
        }
    } catch (IOException e) {
        throw new HoodieIOException("Failed to make dir " + dirPath, e);
    }
    try {
        if (checkIfExists && fs.exists(markerPath)) {
            LOG.warn("Marker Path=" + markerPath + " already exists, cancel creation");
            return Option.empty();
        }
        LOG.info("Creating Marker Path=" + markerPath);
        fs.create(markerPath, false).close();
    } catch (IOException e) {
        throw new HoodieException("Failed to create marker file " + markerPath, e);
    }
    LOG.info("[direct] Created marker file " + markerPath.toString() + " in " + timer.endTimer() + " ms");
    return Option.of(markerPath);
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 78 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class MarkerBasedRollbackUtils method getAllMarkerPaths.

/**
 * Gets all marker paths.
 *
 * @param table       instance of {@code HoodieTable} to use
 * @param context     instance of {@code HoodieEngineContext} to use
 * @param instant     instant of interest to rollback
 * @param parallelism parallelism to use
 * @return a list of all markers
 * @throws IOException
 */
public static List<String> getAllMarkerPaths(HoodieTable table, HoodieEngineContext context, String instant, int parallelism) throws IOException {
    String markerDir = table.getMetaClient().getMarkerFolderPath(instant);
    FileSystem fileSystem = table.getMetaClient().getFs();
    Option<MarkerType> markerTypeOption = MarkerUtils.readMarkerType(fileSystem, markerDir);
    // If there is no marker type file "MARKERS.type", we assume "DIRECT" markers are used
    if (!markerTypeOption.isPresent()) {
        WriteMarkers writeMarkers = WriteMarkersFactory.get(MarkerType.DIRECT, table, instant);
        return new ArrayList<>(writeMarkers.allMarkerFilePaths());
    }
    switch(markerTypeOption.get()) {
        case TIMELINE_SERVER_BASED:
            // Reads all markers written by the timeline server
            Map<String, Set<String>> markersMap = MarkerUtils.readTimelineServerBasedMarkersFromFileSystem(markerDir, fileSystem, context, parallelism);
            return markersMap.values().stream().flatMap(Collection::stream).collect(Collectors.toCollection(ArrayList::new));
        default:
            throw new HoodieException("The marker type \"" + markerTypeOption.get().name() + "\" is not supported.");
    }
}
Also used : Set(java.util.Set) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) MarkerType(org.apache.hudi.common.table.marker.MarkerType) HoodieException(org.apache.hudi.exception.HoodieException)

Example 79 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class HoodieMergeHelper method runMerge.

@Override
public void runMerge(HoodieTable<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> table, HoodieMergeHandle<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> mergeHandle) throws IOException {
    final boolean externalSchemaTransformation = table.getConfig().shouldUseExternalSchemaTransformation();
    Configuration cfgForHoodieFile = new Configuration(table.getHadoopConf());
    HoodieBaseFile baseFile = mergeHandle.baseFileForMerge();
    final GenericDatumWriter<GenericRecord> gWriter;
    final GenericDatumReader<GenericRecord> gReader;
    Schema readSchema;
    if (externalSchemaTransformation || baseFile.getBootstrapBaseFile().isPresent()) {
        readSchema = HoodieFileReaderFactory.getFileReader(table.getHadoopConf(), mergeHandle.getOldFilePath()).getSchema();
        gWriter = new GenericDatumWriter<>(readSchema);
        gReader = new GenericDatumReader<>(readSchema, mergeHandle.getWriterSchemaWithMetaFields());
    } else {
        gReader = null;
        gWriter = null;
        readSchema = mergeHandle.getWriterSchemaWithMetaFields();
    }
    BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;
    HoodieFileReader<GenericRecord> reader = HoodieFileReaderFactory.getFileReader(cfgForHoodieFile, mergeHandle.getOldFilePath());
    try {
        final Iterator<GenericRecord> readerIterator;
        if (baseFile.getBootstrapBaseFile().isPresent()) {
            readerIterator = getMergingIterator(table, mergeHandle, baseFile, reader, readSchema, externalSchemaTransformation);
        } else {
            readerIterator = reader.getRecordIterator(readSchema);
        }
        ThreadLocal<BinaryEncoder> encoderCache = new ThreadLocal<>();
        ThreadLocal<BinaryDecoder> decoderCache = new ThreadLocal<>();
        wrapper = new BoundedInMemoryExecutor(table.getConfig().getWriteBufferLimitBytes(), readerIterator, new UpdateHandler(mergeHandle), record -> {
            if (!externalSchemaTransformation) {
                return record;
            }
            return transformRecordBasedOnNewSchema(gReader, gWriter, encoderCache, decoderCache, (GenericRecord) record);
        }, table.getPreExecuteRunnable());
        wrapper.execute();
    } catch (Exception e) {
        throw new HoodieException(e);
    } finally {
        if (reader != null) {
            reader.close();
        }
        mergeHandle.close();
        if (null != wrapper) {
            wrapper.shutdownNow();
        }
    }
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) Schema(org.apache.avro.Schema) Iterator(java.util.Iterator) HoodieException(org.apache.hudi.exception.HoodieException) BinaryDecoder(org.apache.avro.io.BinaryDecoder) IOException(java.io.IOException) BinaryEncoder(org.apache.avro.io.BinaryEncoder) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieMergeHandle(org.apache.hudi.io.HoodieMergeHandle) HoodieFileReaderFactory(org.apache.hudi.io.storage.HoodieFileReaderFactory) Configuration(org.apache.hadoop.conf.Configuration) HoodieKey(org.apache.hudi.common.model.HoodieKey) BoundedInMemoryExecutor(org.apache.hudi.common.util.queue.BoundedInMemoryExecutor) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) Configuration(org.apache.hadoop.conf.Configuration) BoundedInMemoryExecutor(org.apache.hudi.common.util.queue.BoundedInMemoryExecutor) Schema(org.apache.avro.Schema) HoodieException(org.apache.hudi.exception.HoodieException) BinaryDecoder(org.apache.avro.io.BinaryDecoder) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) BinaryEncoder(org.apache.avro.io.BinaryEncoder) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 80 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class TwoToOneDowngradeHandler method convertToDirectMarkers.

/**
 * Converts the markers in new format(timeline server based) to old format of direct markers,
 * i.e., one marker file per data file, without MARKERS.type file.
 * This needs to be idempotent.
 * 1. read all markers from timeline server based marker files
 * 2. create direct style markers
 * 3. delete marker type file
 * 4. delete timeline server based marker files
 *
 * @param commitInstantTime instant of interest for marker conversion.
 * @param table             instance of {@link HoodieTable} to use
 * @param context           instance of {@link HoodieEngineContext} to use
 * @param parallelism       parallelism to use
 */
private void convertToDirectMarkers(final String commitInstantTime, HoodieTable table, HoodieEngineContext context, int parallelism) throws IOException {
    String markerDir = table.getMetaClient().getMarkerFolderPath(commitInstantTime);
    FileSystem fileSystem = FSUtils.getFs(markerDir, context.getHadoopConf().newCopy());
    Option<MarkerType> markerTypeOption = MarkerUtils.readMarkerType(fileSystem, markerDir);
    if (markerTypeOption.isPresent()) {
        switch(markerTypeOption.get()) {
            case TIMELINE_SERVER_BASED:
                // Reads all markers written by the timeline server
                Map<String, Set<String>> markersMap = MarkerUtils.readTimelineServerBasedMarkersFromFileSystem(markerDir, fileSystem, context, parallelism);
                DirectWriteMarkers directWriteMarkers = new DirectWriteMarkers(table, commitInstantTime);
                // Recreates the markers in the direct format
                markersMap.values().stream().flatMap(Collection::stream).forEach(directWriteMarkers::create);
                // Deletes marker type file
                MarkerUtils.deleteMarkerTypeFile(fileSystem, markerDir);
                // Deletes timeline server based markers
                deleteTimelineBasedMarkerFiles(context, markerDir, fileSystem, parallelism);
                break;
            default:
                throw new HoodieException("The marker type \"" + markerTypeOption.get().name() + "\" is not supported for rollback.");
        }
    } else {
        if (fileSystem.exists(new Path(markerDir))) {
            // In case of partial failures during downgrade, there is a chance that marker type file was deleted,
            // but timeline server based marker files are left.  So deletes them if any
            deleteTimelineBasedMarkerFiles(context, markerDir, fileSystem, parallelism);
        }
    }
}
Also used : DirectWriteMarkers(org.apache.hudi.table.marker.DirectWriteMarkers) Path(org.apache.hadoop.fs.Path) Set(java.util.Set) FileSystem(org.apache.hadoop.fs.FileSystem) MarkerType(org.apache.hudi.common.table.marker.MarkerType) HoodieException(org.apache.hudi.exception.HoodieException)

Aggregations

HoodieException (org.apache.hudi.exception.HoodieException)171 IOException (java.io.IOException)87 Path (org.apache.hadoop.fs.Path)45 Schema (org.apache.avro.Schema)35 HoodieIOException (org.apache.hudi.exception.HoodieIOException)35 List (java.util.List)30 ArrayList (java.util.ArrayList)27 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)23 Collectors (java.util.stream.Collectors)21 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)19 Option (org.apache.hudi.common.util.Option)19 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)18 Map (java.util.Map)16 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)16 GenericRecord (org.apache.avro.generic.GenericRecord)15 Arrays (java.util.Arrays)14 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)14 Logger (org.apache.log4j.Logger)14 FileStatus (org.apache.hadoop.fs.FileStatus)13 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)13