Search in sources :

Example 81 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class TestBoundedInMemoryQueue method testCompositeProducerRecordReading.

/**
 * Test to ensure that we are reading all records from queue iterator when we have multiple producers.
 */
@SuppressWarnings("unchecked")
@Test
@Timeout(value = 60)
public void testCompositeProducerRecordReading() throws Exception {
    final int numRecords = 1000;
    final int numProducers = 40;
    final List<List<HoodieRecord>> recs = new ArrayList<>();
    final BoundedInMemoryQueue<HoodieRecord, HoodieLazyInsertIterable.HoodieInsertValueGenResult> queue = new BoundedInMemoryQueue(FileIOUtils.KB, getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
    // Record Key to <Producer Index, Rec Index within a producer>
    Map<String, Tuple2<Integer, Integer>> keyToProducerAndIndexMap = new HashMap<>();
    for (int i = 0; i < numProducers; i++) {
        List<HoodieRecord> pRecs = dataGen.generateInserts(instantTime, numRecords);
        int j = 0;
        for (HoodieRecord r : pRecs) {
            assertFalse(keyToProducerAndIndexMap.containsKey(r.getRecordKey()));
            keyToProducerAndIndexMap.put(r.getRecordKey(), new Tuple2<>(i, j));
            j++;
        }
        recs.add(pRecs);
    }
    List<BoundedInMemoryQueueProducer<HoodieRecord>> producers = new ArrayList<>();
    for (int i = 0; i < recs.size(); i++) {
        final List<HoodieRecord> r = recs.get(i);
        // Alternate between pull and push based iterators
        if (i % 2 == 0) {
            producers.add(new IteratorBasedQueueProducer<>(r.iterator()));
        } else {
            producers.add(new FunctionBasedQueueProducer<>((buf) -> {
                Iterator<HoodieRecord> itr = r.iterator();
                while (itr.hasNext()) {
                    try {
                        buf.insertRecord(itr.next());
                    } catch (Exception e) {
                        throw new HoodieException(e);
                    }
                }
                return true;
            }));
        }
    }
    final List<Future<Boolean>> futureList = producers.stream().map(producer -> {
        return executorService.submit(() -> {
            producer.produce(queue);
            return true;
        });
    }).collect(Collectors.toList());
    // Close queue
    Future<Boolean> closeFuture = executorService.submit(() -> {
        try {
            for (Future f : futureList) {
                f.get();
            }
            queue.close();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        return true;
    });
    // Used to ensure that consumer sees the records generated by a single producer in FIFO order
    Map<Integer, Integer> lastSeenMap = IntStream.range(0, numProducers).boxed().collect(Collectors.toMap(Function.identity(), x -> -1));
    Map<Integer, Integer> countMap = IntStream.range(0, numProducers).boxed().collect(Collectors.toMap(Function.identity(), x -> 0));
    // Read recs and ensure we have covered all producer recs.
    while (queue.iterator().hasNext()) {
        final HoodieLazyInsertIterable.HoodieInsertValueGenResult payload = queue.iterator().next();
        final HoodieRecord rec = payload.record;
        Tuple2<Integer, Integer> producerPos = keyToProducerAndIndexMap.get(rec.getRecordKey());
        Integer lastSeenPos = lastSeenMap.get(producerPos._1());
        countMap.put(producerPos._1(), countMap.get(producerPos._1()) + 1);
        lastSeenMap.put(producerPos._1(), lastSeenPos + 1);
        // Ensure we are seeing the next record generated
        assertEquals(lastSeenPos + 1, producerPos._2().intValue());
    }
    for (int i = 0; i < numProducers; i++) {
        // Ensure we have seen all the records for each producers
        assertEquals(Integer.valueOf(numRecords), countMap.get(i));
    }
    // Ensure Close future is done
    closeFuture.get();
}
Also used : IntStream(java.util.stream.IntStream) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) BeforeEach(org.junit.jupiter.api.BeforeEach) FileIOUtils(org.apache.hudi.common.util.FileIOUtils) HoodieLazyInsertIterable.getTransformFunction(org.apache.hudi.execution.HoodieLazyInsertIterable.getTransformFunction) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) Function(java.util.function.Function) HoodieClientTestHarness(org.apache.hudi.testutils.HoodieClientTestHarness) BoundedInMemoryQueueProducer(org.apache.hudi.common.util.queue.BoundedInMemoryQueueProducer) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Map(java.util.Map) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) FunctionBasedQueueProducer(org.apache.hudi.common.util.queue.FunctionBasedQueueProducer) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Iterator(java.util.Iterator) Semaphore(java.util.concurrent.Semaphore) IteratorBasedQueueProducer(org.apache.hudi.common.util.queue.IteratorBasedQueueProducer) Mockito.when(org.mockito.Mockito.when) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) Test(org.junit.jupiter.api.Test) ExecutionException(java.util.concurrent.ExecutionException) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) SizeEstimator(org.apache.hudi.common.util.SizeEstimator) BoundedInMemoryQueue(org.apache.hudi.common.util.queue.BoundedInMemoryQueue) Timeout(org.junit.jupiter.api.Timeout) Mockito.mock(org.mockito.Mockito.mock) HashMap(java.util.HashMap) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieException(org.apache.hudi.exception.HoodieException) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) HoodieException(org.apache.hudi.exception.HoodieException) ExecutionException(java.util.concurrent.ExecutionException) BoundedInMemoryQueueProducer(org.apache.hudi.common.util.queue.BoundedInMemoryQueueProducer) Tuple2(scala.Tuple2) BoundedInMemoryQueue(org.apache.hudi.common.util.queue.BoundedInMemoryQueue) Future(java.util.concurrent.Future) Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)

Example 82 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class OrcBootstrapMetadataHandler method executeBootstrap.

@Override
void executeBootstrap(HoodieBootstrapHandle<?, ?, ?, ?> bootstrapHandle, Path sourceFilePath, KeyGeneratorInterface keyGenerator, String partitionPath, Schema avroSchema) throws Exception {
    BoundedInMemoryExecutor<GenericRecord, HoodieRecord, Void> wrapper = null;
    Reader orcReader = OrcFile.createReader(sourceFilePath, OrcFile.readerOptions(table.getHadoopConf()));
    TypeDescription orcSchema = orcReader.getSchema();
    try (RecordReader reader = orcReader.rows(new Reader.Options(table.getHadoopConf()).schema(orcSchema))) {
        wrapper = new BoundedInMemoryExecutor<GenericRecord, HoodieRecord, Void>(config.getWriteBufferLimitBytes(), new OrcReaderIterator(reader, avroSchema, orcSchema), new BootstrapRecordConsumer(bootstrapHandle), inp -> {
            String recKey = keyGenerator.getKey(inp).getRecordKey();
            GenericRecord gr = new GenericData.Record(HoodieAvroUtils.RECORD_KEY_SCHEMA);
            gr.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, recKey);
            BootstrapRecordPayload payload = new BootstrapRecordPayload(gr);
            HoodieRecord rec = new HoodieAvroRecord(new HoodieKey(recKey, partitionPath), payload);
            return rec;
        }, table.getPreExecuteRunnable());
        wrapper.execute();
    } catch (Exception e) {
        throw new HoodieException(e);
    } finally {
        bootstrapHandle.close();
        if (null != wrapper) {
            wrapper.shutdownNow();
        }
    }
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) AvroOrcUtils(org.apache.hudi.common.util.AvroOrcUtils) OrcReaderIterator(org.apache.hudi.common.util.OrcReaderIterator) HoodieException(org.apache.hudi.exception.HoodieException) KeyGeneratorInterface(org.apache.hudi.keygen.KeyGeneratorInterface) OrcFile(org.apache.orc.OrcFile) GenericData(org.apache.avro.generic.GenericData) HoodieBootstrapHandle(org.apache.hudi.io.HoodieBootstrapHandle) Logger(org.apache.log4j.Logger) Reader(org.apache.orc.Reader) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Path(org.apache.hadoop.fs.Path) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) RecordReader(org.apache.orc.RecordReader) TypeDescription(org.apache.orc.TypeDescription) IOException(java.io.IOException) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) BootstrapRecordPayload(org.apache.hudi.client.bootstrap.BootstrapRecordPayload) HoodieKey(org.apache.hudi.common.model.HoodieKey) LogManager(org.apache.log4j.LogManager) BoundedInMemoryExecutor(org.apache.hudi.common.util.queue.BoundedInMemoryExecutor) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) BootstrapRecordPayload(org.apache.hudi.client.bootstrap.BootstrapRecordPayload) RecordReader(org.apache.orc.RecordReader) Reader(org.apache.orc.Reader) RecordReader(org.apache.orc.RecordReader) HoodieException(org.apache.hudi.exception.HoodieException) GenericData(org.apache.avro.generic.GenericData) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) OrcReaderIterator(org.apache.hudi.common.util.OrcReaderIterator) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) TypeDescription(org.apache.orc.TypeDescription) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 83 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class ParquetBootstrapMetadataHandler method executeBootstrap.

@Override
void executeBootstrap(HoodieBootstrapHandle<?, ?, ?, ?> bootstrapHandle, Path sourceFilePath, KeyGeneratorInterface keyGenerator, String partitionPath, Schema avroSchema) throws Exception {
    BoundedInMemoryExecutor<GenericRecord, HoodieRecord, Void> wrapper = null;
    try {
        ParquetReader<IndexedRecord> reader = AvroParquetReader.<IndexedRecord>builder(sourceFilePath).withConf(table.getHadoopConf()).build();
        wrapper = new BoundedInMemoryExecutor<GenericRecord, HoodieRecord, Void>(config.getWriteBufferLimitBytes(), new ParquetReaderIterator(reader), new BootstrapRecordConsumer(bootstrapHandle), inp -> {
            String recKey = keyGenerator.getKey(inp).getRecordKey();
            GenericRecord gr = new GenericData.Record(HoodieAvroUtils.RECORD_KEY_SCHEMA);
            gr.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, recKey);
            BootstrapRecordPayload payload = new BootstrapRecordPayload(gr);
            HoodieRecord rec = new HoodieAvroRecord(new HoodieKey(recKey, partitionPath), payload);
            return rec;
        }, table.getPreExecuteRunnable());
        wrapper.execute();
    } catch (Exception e) {
        throw new HoodieException(e);
    } finally {
        bootstrapHandle.close();
        if (null != wrapper) {
            wrapper.shutdownNow();
        }
    }
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) AvroSchemaConverter(org.apache.parquet.avro.AvroSchemaConverter) HoodieAvroUtils(org.apache.hudi.avro.HoodieAvroUtils) HoodieException(org.apache.hudi.exception.HoodieException) ParquetMetadataConverter(org.apache.parquet.format.converter.ParquetMetadataConverter) KeyGeneratorInterface(org.apache.hudi.keygen.KeyGeneratorInterface) GenericData(org.apache.avro.generic.GenericData) HoodieBootstrapHandle(org.apache.hudi.io.HoodieBootstrapHandle) Logger(org.apache.log4j.Logger) ParquetReaderIterator(org.apache.hudi.common.util.ParquetReaderIterator) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Path(org.apache.hadoop.fs.Path) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) ParquetReader(org.apache.parquet.hadoop.ParquetReader) IOException(java.io.IOException) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) ParquetFileReader(org.apache.parquet.hadoop.ParquetFileReader) MessageType(org.apache.parquet.schema.MessageType) BootstrapRecordPayload(org.apache.hudi.client.bootstrap.BootstrapRecordPayload) AvroParquetReader(org.apache.parquet.avro.AvroParquetReader) HoodieKey(org.apache.hudi.common.model.HoodieKey) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) LogManager(org.apache.log4j.LogManager) BoundedInMemoryExecutor(org.apache.hudi.common.util.queue.BoundedInMemoryExecutor) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) BootstrapRecordPayload(org.apache.hudi.client.bootstrap.BootstrapRecordPayload) HoodieException(org.apache.hudi.exception.HoodieException) ParquetReaderIterator(org.apache.hudi.common.util.ParquetReaderIterator) GenericData(org.apache.avro.generic.GenericData) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 84 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class HoodieRowCreateHandleWithoutMetaFields method write.

/**
 * Write the incoming InternalRow as is.
 *
 * @param record instance of {@link InternalRow} that needs to be written to the fileWriter.
 * @throws IOException
 */
@Override
public void write(InternalRow record) throws IOException {
    try {
        fileWriter.writeRow(record);
        writeStatus.markSuccess();
    } catch (Throwable ge) {
        writeStatus.setGlobalError(ge);
        throw new HoodieException("Exception thrown while writing spark InternalRows to file ", ge);
    }
}
Also used : HoodieException(org.apache.hudi.exception.HoodieException)

Example 85 with HoodieException

use of org.apache.hudi.exception.HoodieException in project hudi by apache.

the class TestDataSourceUtils method testDoWriteOperationWithNonExistUserDefinedBulkInsertPartitioner.

@Test
public void testDoWriteOperationWithNonExistUserDefinedBulkInsertPartitioner() throws HoodieException {
    setAndVerifyHoodieWriteClientWith("NonExistClassName");
    Exception exception = assertThrows(HoodieException.class, () -> {
        DataSourceUtils.doWriteOperation(hoodieWriteClient, hoodieRecords, "test-time", WriteOperationType.BULK_INSERT);
    });
    assertThat(exception.getMessage(), containsString("Could not create UserDefinedBulkInsertPartitioner"));
}
Also used : HoodieException(org.apache.hudi.exception.HoodieException) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

HoodieException (org.apache.hudi.exception.HoodieException)171 IOException (java.io.IOException)87 Path (org.apache.hadoop.fs.Path)45 Schema (org.apache.avro.Schema)35 HoodieIOException (org.apache.hudi.exception.HoodieIOException)35 List (java.util.List)30 ArrayList (java.util.ArrayList)27 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)23 Collectors (java.util.stream.Collectors)21 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)19 Option (org.apache.hudi.common.util.Option)19 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)18 Map (java.util.Map)16 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)16 GenericRecord (org.apache.avro.generic.GenericRecord)15 Arrays (java.util.Arrays)14 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)14 Logger (org.apache.log4j.Logger)14 FileStatus (org.apache.hadoop.fs.FileStatus)13 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)13