Search in sources :

Example 1 with FunctionBasedQueueProducer

use of org.apache.hudi.common.util.queue.FunctionBasedQueueProducer in project hudi by apache.

the class TestBoundedInMemoryQueue method testCompositeProducerRecordReading.

/**
 * Test to ensure that we are reading all records from queue iterator when we have multiple producers.
 */
@SuppressWarnings("unchecked")
@Test
@Timeout(value = 60)
public void testCompositeProducerRecordReading() throws Exception {
    final int numRecords = 1000;
    final int numProducers = 40;
    final List<List<HoodieRecord>> recs = new ArrayList<>();
    final BoundedInMemoryQueue<HoodieRecord, HoodieLazyInsertIterable.HoodieInsertValueGenResult> queue = new BoundedInMemoryQueue(FileIOUtils.KB, getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
    // Record Key to <Producer Index, Rec Index within a producer>
    Map<String, Tuple2<Integer, Integer>> keyToProducerAndIndexMap = new HashMap<>();
    for (int i = 0; i < numProducers; i++) {
        List<HoodieRecord> pRecs = dataGen.generateInserts(instantTime, numRecords);
        int j = 0;
        for (HoodieRecord r : pRecs) {
            assertFalse(keyToProducerAndIndexMap.containsKey(r.getRecordKey()));
            keyToProducerAndIndexMap.put(r.getRecordKey(), new Tuple2<>(i, j));
            j++;
        }
        recs.add(pRecs);
    }
    List<BoundedInMemoryQueueProducer<HoodieRecord>> producers = new ArrayList<>();
    for (int i = 0; i < recs.size(); i++) {
        final List<HoodieRecord> r = recs.get(i);
        // Alternate between pull and push based iterators
        if (i % 2 == 0) {
            producers.add(new IteratorBasedQueueProducer<>(r.iterator()));
        } else {
            producers.add(new FunctionBasedQueueProducer<>((buf) -> {
                Iterator<HoodieRecord> itr = r.iterator();
                while (itr.hasNext()) {
                    try {
                        buf.insertRecord(itr.next());
                    } catch (Exception e) {
                        throw new HoodieException(e);
                    }
                }
                return true;
            }));
        }
    }
    final List<Future<Boolean>> futureList = producers.stream().map(producer -> {
        return executorService.submit(() -> {
            producer.produce(queue);
            return true;
        });
    }).collect(Collectors.toList());
    // Close queue
    Future<Boolean> closeFuture = executorService.submit(() -> {
        try {
            for (Future f : futureList) {
                f.get();
            }
            queue.close();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        return true;
    });
    // Used to ensure that consumer sees the records generated by a single producer in FIFO order
    Map<Integer, Integer> lastSeenMap = IntStream.range(0, numProducers).boxed().collect(Collectors.toMap(Function.identity(), x -> -1));
    Map<Integer, Integer> countMap = IntStream.range(0, numProducers).boxed().collect(Collectors.toMap(Function.identity(), x -> 0));
    // Read recs and ensure we have covered all producer recs.
    while (queue.iterator().hasNext()) {
        final HoodieLazyInsertIterable.HoodieInsertValueGenResult payload = queue.iterator().next();
        final HoodieRecord rec = payload.record;
        Tuple2<Integer, Integer> producerPos = keyToProducerAndIndexMap.get(rec.getRecordKey());
        Integer lastSeenPos = lastSeenMap.get(producerPos._1());
        countMap.put(producerPos._1(), countMap.get(producerPos._1()) + 1);
        lastSeenMap.put(producerPos._1(), lastSeenPos + 1);
        // Ensure we are seeing the next record generated
        assertEquals(lastSeenPos + 1, producerPos._2().intValue());
    }
    for (int i = 0; i < numProducers; i++) {
        // Ensure we have seen all the records for each producers
        assertEquals(Integer.valueOf(numRecords), countMap.get(i));
    }
    // Ensure Close future is done
    closeFuture.get();
}
Also used : IntStream(java.util.stream.IntStream) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) BeforeEach(org.junit.jupiter.api.BeforeEach) FileIOUtils(org.apache.hudi.common.util.FileIOUtils) HoodieLazyInsertIterable.getTransformFunction(org.apache.hudi.execution.HoodieLazyInsertIterable.getTransformFunction) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) Function(java.util.function.Function) HoodieClientTestHarness(org.apache.hudi.testutils.HoodieClientTestHarness) BoundedInMemoryQueueProducer(org.apache.hudi.common.util.queue.BoundedInMemoryQueueProducer) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Map(java.util.Map) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) FunctionBasedQueueProducer(org.apache.hudi.common.util.queue.FunctionBasedQueueProducer) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Iterator(java.util.Iterator) Semaphore(java.util.concurrent.Semaphore) IteratorBasedQueueProducer(org.apache.hudi.common.util.queue.IteratorBasedQueueProducer) Mockito.when(org.mockito.Mockito.when) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) Test(org.junit.jupiter.api.Test) ExecutionException(java.util.concurrent.ExecutionException) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) SizeEstimator(org.apache.hudi.common.util.SizeEstimator) BoundedInMemoryQueue(org.apache.hudi.common.util.queue.BoundedInMemoryQueue) Timeout(org.junit.jupiter.api.Timeout) Mockito.mock(org.mockito.Mockito.mock) HashMap(java.util.HashMap) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieException(org.apache.hudi.exception.HoodieException) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) HoodieException(org.apache.hudi.exception.HoodieException) ExecutionException(java.util.concurrent.ExecutionException) BoundedInMemoryQueueProducer(org.apache.hudi.common.util.queue.BoundedInMemoryQueueProducer) Tuple2(scala.Tuple2) BoundedInMemoryQueue(org.apache.hudi.common.util.queue.BoundedInMemoryQueue) Future(java.util.concurrent.Future) Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)

Aggregations

ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Map (java.util.Map)1 ExecutionException (java.util.concurrent.ExecutionException)1 Future (java.util.concurrent.Future)1 Semaphore (java.util.concurrent.Semaphore)1 Function (java.util.function.Function)1 Collectors (java.util.stream.Collectors)1 IntStream (java.util.stream.IntStream)1 IndexedRecord (org.apache.avro.generic.IndexedRecord)1 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)1 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)1 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)1 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)1 DefaultSizeEstimator (org.apache.hudi.common.util.DefaultSizeEstimator)1 FileIOUtils (org.apache.hudi.common.util.FileIOUtils)1 Option (org.apache.hudi.common.util.Option)1 SizeEstimator (org.apache.hudi.common.util.SizeEstimator)1