use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class ITTestCompactionCommand method testRepairCompaction.
/**
* This function mainly tests the workflow of 'compaction repair' command.
* The real test of {@link org.apache.hudi.client.CompactionAdminClient#repairCompaction}
* is {@link TestCompactionAdminClient#testRepairCompactionPlan}.
*/
@Test
public void testRepairCompaction() throws Exception {
int numEntriesPerInstant = 10;
String compactionInstant = "001";
CompactionTestUtils.setupAndValidateCompactionOperations(metaClient, false, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant);
metaClient.reloadActiveTimeline();
CompactionAdminClient client = new CompactionAdminClient(new HoodieSparkEngineContext(jsc), metaClient.getBasePath());
List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles = client.getRenamingActionsForUnschedulingCompactionPlan(metaClient, compactionInstant, 1, Option.empty(), false);
renameFiles.forEach(lfPair -> {
try {
metaClient.getFs().rename(lfPair.getLeft().getPath(), lfPair.getRight().getPath());
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
}
});
client.unscheduleCompactionPlan(compactionInstant, false, 1, false);
CommandResult cr = getShell().executeCommand(String.format("compaction repair --instant %s --sparkMaster %s", compactionInstant, "local"));
// All Executes is succeeded, result contains true and has no false
// Expected:
// ║ File Id │ Source File Path │ Destination File Path │ Rename Executed? │ Rename Succeeded? │ Error ║
// ║ * │ * │ * │ true │ true │ ║
assertAll("Command run failed", () -> assertTrue(cr.isSuccess()), () -> assertTrue(removeNonWordAndStripSpace(cr.getResult().toString()).contains("true")), () -> assertFalse(removeNonWordAndStripSpace(cr.getResult().toString()).contains("false")));
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class AsyncCompactService method startService.
/**
* Start Compaction Service.
*/
@Override
protected Pair<CompletableFuture, ExecutorService> startService() {
ExecutorService executor = Executors.newFixedThreadPool(maxConcurrentCompaction, r -> {
Thread t = new Thread(r, "async_compact_thread");
t.setDaemon(isRunInDaemonMode());
return t;
});
return Pair.of(CompletableFuture.allOf(IntStream.range(0, maxConcurrentCompaction).mapToObj(i -> CompletableFuture.supplyAsync(() -> {
try {
// Set Compactor Pool Name for allowing users to prioritize compaction
LOG.info("Setting pool name for compaction to " + COMPACT_POOL_NAME);
context.setProperty(EngineProperty.COMPACTION_POOL_NAME, COMPACT_POOL_NAME);
while (!isShutdownRequested()) {
final HoodieInstant instant = fetchNextAsyncServiceInstant();
if (null != instant) {
LOG.info("Starting Compaction for instant " + instant);
compactor.compact(instant);
LOG.info("Finished Compaction for instant " + instant);
}
}
LOG.info("Compactor shutting down properly!!");
} catch (InterruptedException ie) {
hasError = true;
LOG.warn("Compactor executor thread got interrupted exception. Stopping", ie);
} catch (IOException e) {
hasError = true;
LOG.error("Compactor executor failed due to IOException", e);
throw new HoodieIOException(e.getMessage(), e);
} catch (Exception e) {
hasError = true;
LOG.error("Compactor executor failed", e);
throw e;
}
return true;
}, executor)).toArray(CompletableFuture[]::new)), executor);
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class HoodieFileSliceReader method getFileSliceReader.
public static HoodieFileSliceReader getFileSliceReader(Option<HoodieFileReader> baseFileReader, HoodieMergedLogRecordScanner scanner, Schema schema, String payloadClass, String preCombineField, Option<Pair<String, String>> simpleKeyGenFieldsOpt) throws IOException {
if (baseFileReader.isPresent()) {
Iterator baseIterator = baseFileReader.get().getRecordIterator(schema);
while (baseIterator.hasNext()) {
GenericRecord record = (GenericRecord) baseIterator.next();
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = transform(record, scanner, payloadClass, preCombineField, simpleKeyGenFieldsOpt);
scanner.processNextRecord(hoodieRecord);
}
return new HoodieFileSliceReader(scanner.iterator());
} else {
Iterable<HoodieRecord<? extends HoodieRecordPayload>> iterable = () -> scanner.iterator();
HoodiePayloadConfig payloadConfig = HoodiePayloadConfig.newBuilder().withPayloadOrderingField(preCombineField).build();
return new HoodieFileSliceReader(StreamSupport.stream(iterable.spliterator(), false).map(e -> {
try {
GenericRecord record = (GenericRecord) e.getData().getInsertValue(schema, payloadConfig.getProps()).get();
return transform(record, scanner, payloadClass, preCombineField, simpleKeyGenFieldsOpt);
} catch (IOException io) {
throw new HoodieIOException("Error while creating reader for file slice with no base file.", io);
}
}).iterator());
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class AvroOrcUtils method readFromVector.
/**
* Read the Column vector at a given position conforming to a given ORC schema.
*
* @param type ORC schema of the object to read.
* @param colVector The column vector to read.
* @param avroSchema Avro schema of the object to read.
* Only used to check logical types for timestamp unit conversion.
* @param vectorPos The position in the vector where the value to read is stored at.
* @return The object being read.
*/
public static Object readFromVector(TypeDescription type, ColumnVector colVector, Schema avroSchema, int vectorPos) {
if (colVector.isRepeating) {
vectorPos = 0;
}
if (colVector.isNull[vectorPos]) {
return null;
}
if (avroSchema.getType().equals(Schema.Type.UNION)) {
avroSchema = getActualSchemaType(avroSchema);
}
LogicalType logicalType = avroSchema != null ? avroSchema.getLogicalType() : null;
switch(type.getCategory()) {
case BOOLEAN:
return ((LongColumnVector) colVector).vector[vectorPos] != 0;
case BYTE:
return (byte) ((LongColumnVector) colVector).vector[vectorPos];
case SHORT:
return (short) ((LongColumnVector) colVector).vector[vectorPos];
case INT:
return (int) ((LongColumnVector) colVector).vector[vectorPos];
case LONG:
return ((LongColumnVector) colVector).vector[vectorPos];
case FLOAT:
return (float) ((DoubleColumnVector) colVector).vector[vectorPos];
case DOUBLE:
return ((DoubleColumnVector) colVector).vector[vectorPos];
case VARCHAR:
case CHAR:
int maxLength = type.getMaxLength();
String result = ((BytesColumnVector) colVector).toString(vectorPos);
if (result.length() <= maxLength) {
return result;
} else {
throw new HoodieIOException("CHAR/VARCHAR has length " + result.length() + " greater than Max Length allowed");
}
case STRING:
String stringType = avroSchema.getProp(GenericData.STRING_PROP);
if (stringType == null || !stringType.equals(StringType.String)) {
int stringLength = ((BytesColumnVector) colVector).length[vectorPos];
int stringOffset = ((BytesColumnVector) colVector).start[vectorPos];
byte[] stringBytes = new byte[stringLength];
System.arraycopy(((BytesColumnVector) colVector).vector[vectorPos], stringOffset, stringBytes, 0, stringLength);
return new Utf8(stringBytes);
} else {
return ((BytesColumnVector) colVector).toString(vectorPos);
}
case DATE:
// convert to daysSinceEpoch for LogicalType.Date
return (int) ((LongColumnVector) colVector).vector[vectorPos];
case TIMESTAMP:
// The unit of time in ORC is millis. Convert (time,nanos) to the desired unit per logicalType
long time = ((TimestampColumnVector) colVector).time[vectorPos];
int nanos = ((TimestampColumnVector) colVector).nanos[vectorPos];
if (logicalType instanceof LogicalTypes.TimestampMillis) {
return time;
} else if (logicalType instanceof LogicalTypes.TimestampMicros) {
return time * MICROS_PER_MILLI + nanos / NANOS_PER_MICRO;
} else {
return ((TimestampColumnVector) colVector).getTimestampAsLong(vectorPos);
}
case BINARY:
int binaryLength = ((BytesColumnVector) colVector).length[vectorPos];
int binaryOffset = ((BytesColumnVector) colVector).start[vectorPos];
byte[] binaryBytes = new byte[binaryLength];
System.arraycopy(((BytesColumnVector) colVector).vector[vectorPos], binaryOffset, binaryBytes, 0, binaryLength);
// return a ByteBuffer to be consistent with AvroRecordConverter
return ByteBuffer.wrap(binaryBytes);
case DECIMAL:
// HiveDecimal always ignores trailing zeros, thus modifies the scale implicitly,
// therefore, the scale must be enforced here.
BigDecimal bigDecimal = ((DecimalColumnVector) colVector).vector[vectorPos].getHiveDecimal().bigDecimalValue().setScale(((LogicalTypes.Decimal) logicalType).getScale());
Schema.Type baseType = avroSchema.getType();
if (baseType.equals(Schema.Type.FIXED)) {
return new Conversions.DecimalConversion().toFixed(bigDecimal, avroSchema, logicalType);
} else if (baseType.equals(Schema.Type.BYTES)) {
return bigDecimal.unscaledValue().toByteArray();
} else {
throw new HoodieIOException(baseType.getName() + "is not a valid type for LogicalTypes.DECIMAL.");
}
case LIST:
ArrayList<Object> list = new ArrayList<>();
ListColumnVector listVector = (ListColumnVector) colVector;
int listLength = (int) listVector.lengths[vectorPos];
int listOffset = (int) listVector.offsets[vectorPos];
list.ensureCapacity(listLength);
TypeDescription childType = type.getChildren().get(0);
for (int i = 0; i < listLength; i++) {
list.add(readFromVector(childType, listVector.child, avroSchema.getElementType(), listOffset + i));
}
return list;
case MAP:
Map<String, Object> map = new HashMap<String, Object>();
MapColumnVector mapVector = (MapColumnVector) colVector;
int mapLength = (int) mapVector.lengths[vectorPos];
int mapOffset = (int) mapVector.offsets[vectorPos];
// keys are always strings for maps in Avro
Schema keySchema = Schema.create(Schema.Type.STRING);
for (int i = 0; i < mapLength; i++) {
map.put(readFromVector(type.getChildren().get(0), mapVector.keys, keySchema, i + mapOffset).toString(), readFromVector(type.getChildren().get(1), mapVector.values, avroSchema.getValueType(), i + mapOffset));
}
return map;
case STRUCT:
StructColumnVector structVector = (StructColumnVector) colVector;
List<TypeDescription> children = type.getChildren();
GenericData.Record record = new GenericData.Record(avroSchema);
for (int i = 0; i < children.size(); i++) {
record.put(i, readFromVector(children.get(i), structVector.fields[i], avroSchema.getFields().get(i).schema(), vectorPos));
}
return record;
case UNION:
UnionColumnVector unionVector = (UnionColumnVector) colVector;
int tag = unionVector.tags[vectorPos];
ColumnVector fieldVector = unionVector.fields[tag];
return readFromVector(type.getChildren().get(tag), fieldVector, avroSchema.getTypes().get(tag), vectorPos);
default:
throw new HoodieIOException("Unrecognized TypeDescription " + type.toString());
}
}
use of org.apache.hudi.exception.HoodieIOException in project hudi by apache.
the class HoodieActiveTimeline method revertCompleteToInflight.
private void revertCompleteToInflight(HoodieInstant completed, HoodieInstant inflight) {
ValidationUtils.checkArgument(completed.getTimestamp().equals(inflight.getTimestamp()));
Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), inflight.getFileName());
Path commitFilePath = new Path(metaClient.getMetaPath(), completed.getFileName());
try {
if (metaClient.getTimelineLayoutVersion().isNullVersion()) {
if (!metaClient.getFs().exists(inFlightCommitFilePath)) {
boolean success = metaClient.getFs().rename(commitFilePath, inFlightCommitFilePath);
if (!success) {
throw new HoodieIOException("Could not rename " + commitFilePath + " to " + inFlightCommitFilePath);
}
}
} else {
Path requestedInstantFilePath = new Path(metaClient.getMetaPath(), new HoodieInstant(State.REQUESTED, inflight.getAction(), inflight.getTimestamp()).getFileName());
// If inflight and requested files do not exist, create one
if (!metaClient.getFs().exists(requestedInstantFilePath)) {
metaClient.getFs().create(requestedInstantFilePath, false).close();
}
if (!metaClient.getFs().exists(inFlightCommitFilePath)) {
metaClient.getFs().create(inFlightCommitFilePath, false).close();
}
boolean success = metaClient.getFs().delete(commitFilePath, false);
ValidationUtils.checkArgument(success, "State Reverting failed");
}
} catch (IOException e) {
throw new HoodieIOException("Could not complete revert " + completed, e);
}
}
Aggregations