Search in sources :

Example 11 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class ITTestCompactionCommand method testRepairCompaction.

/**
 * This function mainly tests the workflow of 'compaction repair' command.
 * The real test of {@link org.apache.hudi.client.CompactionAdminClient#repairCompaction}
 * is {@link TestCompactionAdminClient#testRepairCompactionPlan}.
 */
@Test
public void testRepairCompaction() throws Exception {
    int numEntriesPerInstant = 10;
    String compactionInstant = "001";
    CompactionTestUtils.setupAndValidateCompactionOperations(metaClient, false, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant);
    metaClient.reloadActiveTimeline();
    CompactionAdminClient client = new CompactionAdminClient(new HoodieSparkEngineContext(jsc), metaClient.getBasePath());
    List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles = client.getRenamingActionsForUnschedulingCompactionPlan(metaClient, compactionInstant, 1, Option.empty(), false);
    renameFiles.forEach(lfPair -> {
        try {
            metaClient.getFs().rename(lfPair.getLeft().getPath(), lfPair.getRight().getPath());
        } catch (IOException e) {
            throw new HoodieIOException(e.getMessage(), e);
        }
    });
    client.unscheduleCompactionPlan(compactionInstant, false, 1, false);
    CommandResult cr = getShell().executeCommand(String.format("compaction repair --instant %s --sparkMaster %s", compactionInstant, "local"));
    // All Executes is succeeded, result contains true and has no false
    // Expected:
    // ║ File Id │ Source File Path │ Destination File Path │ Rename Executed? │ Rename Succeeded? │ Error ║
    // ║ *       │     *            │        *              │    true          │     true          │       ║
    assertAll("Command run failed", () -> assertTrue(cr.isSuccess()), () -> assertTrue(removeNonWordAndStripSpace(cr.getResult().toString()).contains("true")), () -> assertFalse(removeNonWordAndStripSpace(cr.getResult().toString()).contains("false")));
}
Also used : HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) TestCompactionAdminClient(org.apache.hudi.client.TestCompactionAdminClient) CompactionAdminClient(org.apache.hudi.client.CompactionAdminClient) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Pair(org.apache.hudi.common.util.collection.Pair) CommandResult(org.springframework.shell.core.CommandResult) AbstractShellIntegrationTest(org.apache.hudi.cli.testutils.AbstractShellIntegrationTest) Test(org.junit.jupiter.api.Test)

Example 12 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class AsyncCompactService method startService.

/**
 * Start Compaction Service.
 */
@Override
protected Pair<CompletableFuture, ExecutorService> startService() {
    ExecutorService executor = Executors.newFixedThreadPool(maxConcurrentCompaction, r -> {
        Thread t = new Thread(r, "async_compact_thread");
        t.setDaemon(isRunInDaemonMode());
        return t;
    });
    return Pair.of(CompletableFuture.allOf(IntStream.range(0, maxConcurrentCompaction).mapToObj(i -> CompletableFuture.supplyAsync(() -> {
        try {
            // Set Compactor Pool Name for allowing users to prioritize compaction
            LOG.info("Setting pool name for compaction to " + COMPACT_POOL_NAME);
            context.setProperty(EngineProperty.COMPACTION_POOL_NAME, COMPACT_POOL_NAME);
            while (!isShutdownRequested()) {
                final HoodieInstant instant = fetchNextAsyncServiceInstant();
                if (null != instant) {
                    LOG.info("Starting Compaction for instant " + instant);
                    compactor.compact(instant);
                    LOG.info("Finished Compaction for instant " + instant);
                }
            }
            LOG.info("Compactor shutting down properly!!");
        } catch (InterruptedException ie) {
            hasError = true;
            LOG.warn("Compactor executor thread got interrupted exception. Stopping", ie);
        } catch (IOException e) {
            hasError = true;
            LOG.error("Compactor executor failed due to IOException", e);
            throw new HoodieIOException(e.getMessage(), e);
        } catch (Exception e) {
            hasError = true;
            LOG.error("Compactor executor failed", e);
            throw e;
        }
        return true;
    }, executor)).toArray(CompletableFuture[]::new)), executor);
}
Also used : IntStream(java.util.stream.IntStream) BaseHoodieWriteClient(org.apache.hudi.client.BaseHoodieWriteClient) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) IOException(java.io.IOException) CompletableFuture(java.util.concurrent.CompletableFuture) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) BaseCompactor(org.apache.hudi.client.BaseCompactor) Executors(java.util.concurrent.Executors) Logger(org.apache.log4j.Logger) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) EngineProperty(org.apache.hudi.common.engine.EngineProperty) ExecutorService(java.util.concurrent.ExecutorService) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieIOException(org.apache.hudi.exception.HoodieIOException) ExecutorService(java.util.concurrent.ExecutorService) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 13 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieFileSliceReader method getFileSliceReader.

public static HoodieFileSliceReader getFileSliceReader(Option<HoodieFileReader> baseFileReader, HoodieMergedLogRecordScanner scanner, Schema schema, String payloadClass, String preCombineField, Option<Pair<String, String>> simpleKeyGenFieldsOpt) throws IOException {
    if (baseFileReader.isPresent()) {
        Iterator baseIterator = baseFileReader.get().getRecordIterator(schema);
        while (baseIterator.hasNext()) {
            GenericRecord record = (GenericRecord) baseIterator.next();
            HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = transform(record, scanner, payloadClass, preCombineField, simpleKeyGenFieldsOpt);
            scanner.processNextRecord(hoodieRecord);
        }
        return new HoodieFileSliceReader(scanner.iterator());
    } else {
        Iterable<HoodieRecord<? extends HoodieRecordPayload>> iterable = () -> scanner.iterator();
        HoodiePayloadConfig payloadConfig = HoodiePayloadConfig.newBuilder().withPayloadOrderingField(preCombineField).build();
        return new HoodieFileSliceReader(StreamSupport.stream(iterable.spliterator(), false).map(e -> {
            try {
                GenericRecord record = (GenericRecord) e.getData().getInsertValue(schema, payloadConfig.getProps()).get();
                return transform(record, scanner, payloadClass, preCombineField, simpleKeyGenFieldsOpt);
            } catch (IOException io) {
                throw new HoodieIOException("Error while creating reader for file slice with no base file.", io);
            }
        }).iterator());
    }
}
Also used : HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Iterator(java.util.Iterator) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) HoodiePayloadConfig(org.apache.hudi.config.HoodiePayloadConfig)

Example 14 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class AvroOrcUtils method readFromVector.

/**
 * Read the Column vector at a given position conforming to a given ORC schema.
 *
 * @param type        ORC schema of the object to read.
 * @param colVector   The column vector to read.
 * @param avroSchema  Avro schema of the object to read.
 *                    Only used to check logical types for timestamp unit conversion.
 * @param vectorPos   The position in the vector where the value to read is stored at.
 * @return            The object being read.
 */
public static Object readFromVector(TypeDescription type, ColumnVector colVector, Schema avroSchema, int vectorPos) {
    if (colVector.isRepeating) {
        vectorPos = 0;
    }
    if (colVector.isNull[vectorPos]) {
        return null;
    }
    if (avroSchema.getType().equals(Schema.Type.UNION)) {
        avroSchema = getActualSchemaType(avroSchema);
    }
    LogicalType logicalType = avroSchema != null ? avroSchema.getLogicalType() : null;
    switch(type.getCategory()) {
        case BOOLEAN:
            return ((LongColumnVector) colVector).vector[vectorPos] != 0;
        case BYTE:
            return (byte) ((LongColumnVector) colVector).vector[vectorPos];
        case SHORT:
            return (short) ((LongColumnVector) colVector).vector[vectorPos];
        case INT:
            return (int) ((LongColumnVector) colVector).vector[vectorPos];
        case LONG:
            return ((LongColumnVector) colVector).vector[vectorPos];
        case FLOAT:
            return (float) ((DoubleColumnVector) colVector).vector[vectorPos];
        case DOUBLE:
            return ((DoubleColumnVector) colVector).vector[vectorPos];
        case VARCHAR:
        case CHAR:
            int maxLength = type.getMaxLength();
            String result = ((BytesColumnVector) colVector).toString(vectorPos);
            if (result.length() <= maxLength) {
                return result;
            } else {
                throw new HoodieIOException("CHAR/VARCHAR has length " + result.length() + " greater than Max Length allowed");
            }
        case STRING:
            String stringType = avroSchema.getProp(GenericData.STRING_PROP);
            if (stringType == null || !stringType.equals(StringType.String)) {
                int stringLength = ((BytesColumnVector) colVector).length[vectorPos];
                int stringOffset = ((BytesColumnVector) colVector).start[vectorPos];
                byte[] stringBytes = new byte[stringLength];
                System.arraycopy(((BytesColumnVector) colVector).vector[vectorPos], stringOffset, stringBytes, 0, stringLength);
                return new Utf8(stringBytes);
            } else {
                return ((BytesColumnVector) colVector).toString(vectorPos);
            }
        case DATE:
            // convert to daysSinceEpoch for LogicalType.Date
            return (int) ((LongColumnVector) colVector).vector[vectorPos];
        case TIMESTAMP:
            // The unit of time in ORC is millis. Convert (time,nanos) to the desired unit per logicalType
            long time = ((TimestampColumnVector) colVector).time[vectorPos];
            int nanos = ((TimestampColumnVector) colVector).nanos[vectorPos];
            if (logicalType instanceof LogicalTypes.TimestampMillis) {
                return time;
            } else if (logicalType instanceof LogicalTypes.TimestampMicros) {
                return time * MICROS_PER_MILLI + nanos / NANOS_PER_MICRO;
            } else {
                return ((TimestampColumnVector) colVector).getTimestampAsLong(vectorPos);
            }
        case BINARY:
            int binaryLength = ((BytesColumnVector) colVector).length[vectorPos];
            int binaryOffset = ((BytesColumnVector) colVector).start[vectorPos];
            byte[] binaryBytes = new byte[binaryLength];
            System.arraycopy(((BytesColumnVector) colVector).vector[vectorPos], binaryOffset, binaryBytes, 0, binaryLength);
            // return a ByteBuffer to be consistent with AvroRecordConverter
            return ByteBuffer.wrap(binaryBytes);
        case DECIMAL:
            // HiveDecimal always ignores trailing zeros, thus modifies the scale implicitly,
            // therefore, the scale must be enforced here.
            BigDecimal bigDecimal = ((DecimalColumnVector) colVector).vector[vectorPos].getHiveDecimal().bigDecimalValue().setScale(((LogicalTypes.Decimal) logicalType).getScale());
            Schema.Type baseType = avroSchema.getType();
            if (baseType.equals(Schema.Type.FIXED)) {
                return new Conversions.DecimalConversion().toFixed(bigDecimal, avroSchema, logicalType);
            } else if (baseType.equals(Schema.Type.BYTES)) {
                return bigDecimal.unscaledValue().toByteArray();
            } else {
                throw new HoodieIOException(baseType.getName() + "is not a valid type for LogicalTypes.DECIMAL.");
            }
        case LIST:
            ArrayList<Object> list = new ArrayList<>();
            ListColumnVector listVector = (ListColumnVector) colVector;
            int listLength = (int) listVector.lengths[vectorPos];
            int listOffset = (int) listVector.offsets[vectorPos];
            list.ensureCapacity(listLength);
            TypeDescription childType = type.getChildren().get(0);
            for (int i = 0; i < listLength; i++) {
                list.add(readFromVector(childType, listVector.child, avroSchema.getElementType(), listOffset + i));
            }
            return list;
        case MAP:
            Map<String, Object> map = new HashMap<String, Object>();
            MapColumnVector mapVector = (MapColumnVector) colVector;
            int mapLength = (int) mapVector.lengths[vectorPos];
            int mapOffset = (int) mapVector.offsets[vectorPos];
            // keys are always strings for maps in Avro
            Schema keySchema = Schema.create(Schema.Type.STRING);
            for (int i = 0; i < mapLength; i++) {
                map.put(readFromVector(type.getChildren().get(0), mapVector.keys, keySchema, i + mapOffset).toString(), readFromVector(type.getChildren().get(1), mapVector.values, avroSchema.getValueType(), i + mapOffset));
            }
            return map;
        case STRUCT:
            StructColumnVector structVector = (StructColumnVector) colVector;
            List<TypeDescription> children = type.getChildren();
            GenericData.Record record = new GenericData.Record(avroSchema);
            for (int i = 0; i < children.size(); i++) {
                record.put(i, readFromVector(children.get(i), structVector.fields[i], avroSchema.getFields().get(i).schema(), vectorPos));
            }
            return record;
        case UNION:
            UnionColumnVector unionVector = (UnionColumnVector) colVector;
            int tag = unionVector.tags[vectorPos];
            ColumnVector fieldVector = unionVector.fields[tag];
            return readFromVector(type.getChildren().get(tag), fieldVector, avroSchema.getTypes().get(tag), vectorPos);
        default:
            throw new HoodieIOException("Unrecognized TypeDescription " + type.toString());
    }
}
Also used : DecimalColumnVector(org.apache.orc.storage.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.orc.storage.ql.exec.vector.DoubleColumnVector) MapColumnVector(org.apache.orc.storage.ql.exec.vector.MapColumnVector) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) UnionColumnVector(org.apache.orc.storage.ql.exec.vector.UnionColumnVector) LogicalType(org.apache.avro.LogicalType) ListColumnVector(org.apache.orc.storage.ql.exec.vector.ListColumnVector) StructColumnVector(org.apache.orc.storage.ql.exec.vector.StructColumnVector) BytesColumnVector(org.apache.orc.storage.ql.exec.vector.BytesColumnVector) TypeDescription(org.apache.orc.TypeDescription) LongColumnVector(org.apache.orc.storage.ql.exec.vector.LongColumnVector) LogicalTypes(org.apache.avro.LogicalTypes) GenericData(org.apache.avro.generic.GenericData) BigDecimal(java.math.BigDecimal) UnionColumnVector(org.apache.orc.storage.ql.exec.vector.UnionColumnVector) TimestampColumnVector(org.apache.orc.storage.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.orc.storage.ql.exec.vector.DecimalColumnVector) LongColumnVector(org.apache.orc.storage.ql.exec.vector.LongColumnVector) MapColumnVector(org.apache.orc.storage.ql.exec.vector.MapColumnVector) ListColumnVector(org.apache.orc.storage.ql.exec.vector.ListColumnVector) BytesColumnVector(org.apache.orc.storage.ql.exec.vector.BytesColumnVector) StructColumnVector(org.apache.orc.storage.ql.exec.vector.StructColumnVector) ColumnVector(org.apache.orc.storage.ql.exec.vector.ColumnVector) DoubleColumnVector(org.apache.orc.storage.ql.exec.vector.DoubleColumnVector) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Conversions(org.apache.avro.Conversions) Utf8(org.apache.avro.util.Utf8)

Example 15 with HoodieIOException

use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.

the class HoodieActiveTimeline method revertCompleteToInflight.

private void revertCompleteToInflight(HoodieInstant completed, HoodieInstant inflight) {
    ValidationUtils.checkArgument(completed.getTimestamp().equals(inflight.getTimestamp()));
    Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), inflight.getFileName());
    Path commitFilePath = new Path(metaClient.getMetaPath(), completed.getFileName());
    try {
        if (metaClient.getTimelineLayoutVersion().isNullVersion()) {
            if (!metaClient.getFs().exists(inFlightCommitFilePath)) {
                boolean success = metaClient.getFs().rename(commitFilePath, inFlightCommitFilePath);
                if (!success) {
                    throw new HoodieIOException("Could not rename " + commitFilePath + " to " + inFlightCommitFilePath);
                }
            }
        } else {
            Path requestedInstantFilePath = new Path(metaClient.getMetaPath(), new HoodieInstant(State.REQUESTED, inflight.getAction(), inflight.getTimestamp()).getFileName());
            // If inflight and requested files do not exist, create one
            if (!metaClient.getFs().exists(requestedInstantFilePath)) {
                metaClient.getFs().create(requestedInstantFilePath, false).close();
            }
            if (!metaClient.getFs().exists(inFlightCommitFilePath)) {
                metaClient.getFs().create(inFlightCommitFilePath, false).close();
            }
            boolean success = metaClient.getFs().delete(commitFilePath, false);
            ValidationUtils.checkArgument(success, "State Reverting failed");
        }
    } catch (IOException e) {
        throw new HoodieIOException("Could not complete revert " + completed, e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieIOException(org.apache.hudi.exception.HoodieIOException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Aggregations

HoodieIOException (org.apache.hudi.exception.HoodieIOException)139 IOException (java.io.IOException)127 Path (org.apache.hadoop.fs.Path)45 List (java.util.List)31 ArrayList (java.util.ArrayList)30 Option (org.apache.hudi.common.util.Option)27 Collectors (java.util.stream.Collectors)26 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)26 Pair (org.apache.hudi.common.util.collection.Pair)25 LogManager (org.apache.log4j.LogManager)25 Logger (org.apache.log4j.Logger)25 Map (java.util.Map)21 FileSystem (org.apache.hadoop.fs.FileSystem)20 GenericRecord (org.apache.avro.generic.GenericRecord)19 HashSet (java.util.HashSet)18 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 Set (java.util.Set)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)17 HoodieException (org.apache.hudi.exception.HoodieException)17