Search in sources :

Example 1 with BoundedInMemoryQueue

use of org.apache.hudi.common.util.queue.BoundedInMemoryQueue in project hudi by apache.

the class TestBoundedInMemoryQueue method testCompositeProducerRecordReading.

/**
 * Test to ensure that we are reading all records from queue iterator when we have multiple producers.
 */
@SuppressWarnings("unchecked")
@Test
@Timeout(value = 60)
public void testCompositeProducerRecordReading() throws Exception {
    final int numRecords = 1000;
    final int numProducers = 40;
    final List<List<HoodieRecord>> recs = new ArrayList<>();
    final BoundedInMemoryQueue<HoodieRecord, HoodieLazyInsertIterable.HoodieInsertValueGenResult> queue = new BoundedInMemoryQueue(FileIOUtils.KB, getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
    // Record Key to <Producer Index, Rec Index within a producer>
    Map<String, Tuple2<Integer, Integer>> keyToProducerAndIndexMap = new HashMap<>();
    for (int i = 0; i < numProducers; i++) {
        List<HoodieRecord> pRecs = dataGen.generateInserts(instantTime, numRecords);
        int j = 0;
        for (HoodieRecord r : pRecs) {
            assertFalse(keyToProducerAndIndexMap.containsKey(r.getRecordKey()));
            keyToProducerAndIndexMap.put(r.getRecordKey(), new Tuple2<>(i, j));
            j++;
        }
        recs.add(pRecs);
    }
    List<BoundedInMemoryQueueProducer<HoodieRecord>> producers = new ArrayList<>();
    for (int i = 0; i < recs.size(); i++) {
        final List<HoodieRecord> r = recs.get(i);
        // Alternate between pull and push based iterators
        if (i % 2 == 0) {
            producers.add(new IteratorBasedQueueProducer<>(r.iterator()));
        } else {
            producers.add(new FunctionBasedQueueProducer<>((buf) -> {
                Iterator<HoodieRecord> itr = r.iterator();
                while (itr.hasNext()) {
                    try {
                        buf.insertRecord(itr.next());
                    } catch (Exception e) {
                        throw new HoodieException(e);
                    }
                }
                return true;
            }));
        }
    }
    final List<Future<Boolean>> futureList = producers.stream().map(producer -> {
        return executorService.submit(() -> {
            producer.produce(queue);
            return true;
        });
    }).collect(Collectors.toList());
    // Close queue
    Future<Boolean> closeFuture = executorService.submit(() -> {
        try {
            for (Future f : futureList) {
                f.get();
            }
            queue.close();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        return true;
    });
    // Used to ensure that consumer sees the records generated by a single producer in FIFO order
    Map<Integer, Integer> lastSeenMap = IntStream.range(0, numProducers).boxed().collect(Collectors.toMap(Function.identity(), x -> -1));
    Map<Integer, Integer> countMap = IntStream.range(0, numProducers).boxed().collect(Collectors.toMap(Function.identity(), x -> 0));
    // Read recs and ensure we have covered all producer recs.
    while (queue.iterator().hasNext()) {
        final HoodieLazyInsertIterable.HoodieInsertValueGenResult payload = queue.iterator().next();
        final HoodieRecord rec = payload.record;
        Tuple2<Integer, Integer> producerPos = keyToProducerAndIndexMap.get(rec.getRecordKey());
        Integer lastSeenPos = lastSeenMap.get(producerPos._1());
        countMap.put(producerPos._1(), countMap.get(producerPos._1()) + 1);
        lastSeenMap.put(producerPos._1(), lastSeenPos + 1);
        // Ensure we are seeing the next record generated
        assertEquals(lastSeenPos + 1, producerPos._2().intValue());
    }
    for (int i = 0; i < numProducers; i++) {
        // Ensure we have seen all the records for each producers
        assertEquals(Integer.valueOf(numRecords), countMap.get(i));
    }
    // Ensure Close future is done
    closeFuture.get();
}
Also used : IntStream(java.util.stream.IntStream) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) BeforeEach(org.junit.jupiter.api.BeforeEach) FileIOUtils(org.apache.hudi.common.util.FileIOUtils) HoodieLazyInsertIterable.getTransformFunction(org.apache.hudi.execution.HoodieLazyInsertIterable.getTransformFunction) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) Function(java.util.function.Function) HoodieClientTestHarness(org.apache.hudi.testutils.HoodieClientTestHarness) BoundedInMemoryQueueProducer(org.apache.hudi.common.util.queue.BoundedInMemoryQueueProducer) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Map(java.util.Map) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) FunctionBasedQueueProducer(org.apache.hudi.common.util.queue.FunctionBasedQueueProducer) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Iterator(java.util.Iterator) Semaphore(java.util.concurrent.Semaphore) IteratorBasedQueueProducer(org.apache.hudi.common.util.queue.IteratorBasedQueueProducer) Mockito.when(org.mockito.Mockito.when) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) Test(org.junit.jupiter.api.Test) ExecutionException(java.util.concurrent.ExecutionException) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) SizeEstimator(org.apache.hudi.common.util.SizeEstimator) BoundedInMemoryQueue(org.apache.hudi.common.util.queue.BoundedInMemoryQueue) Timeout(org.junit.jupiter.api.Timeout) Mockito.mock(org.mockito.Mockito.mock) HashMap(java.util.HashMap) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieException(org.apache.hudi.exception.HoodieException) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) HoodieException(org.apache.hudi.exception.HoodieException) ExecutionException(java.util.concurrent.ExecutionException) BoundedInMemoryQueueProducer(org.apache.hudi.common.util.queue.BoundedInMemoryQueueProducer) Tuple2(scala.Tuple2) BoundedInMemoryQueue(org.apache.hudi.common.util.queue.BoundedInMemoryQueue) Future(java.util.concurrent.Future) Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)

Example 2 with BoundedInMemoryQueue

use of org.apache.hudi.common.util.queue.BoundedInMemoryQueue in project hudi by apache.

the class TestBoundedInMemoryQueue method testException.

// Test to ensure that exception in either queueing thread or BufferedIterator-reader thread
// is propagated to
// another thread.
@SuppressWarnings("unchecked")
@Test
@Timeout(value = 60)
public void testException() throws Exception {
    final int numRecords = 256;
    final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
    final SizeEstimator<Tuple2<HoodieRecord, Option<IndexedRecord>>> sizeEstimator = new DefaultSizeEstimator<>();
    // queue memory limit
    HoodieLazyInsertIterable.HoodieInsertValueGenResult payload = getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA).apply((HoodieAvroRecord) hoodieRecords.get(0));
    final long objSize = sizeEstimator.sizeEstimate(new Tuple2<>(payload.record, payload.insertValue));
    final long memoryLimitInBytes = 4 * objSize;
    // first let us throw exception from queueIterator reader and test that queueing thread
    // stops and throws
    // correct exception back.
    BoundedInMemoryQueue<HoodieRecord, Tuple2<HoodieRecord, Option<IndexedRecord>>> queue1 = new BoundedInMemoryQueue(memoryLimitInBytes, getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
    // Produce
    Future<Boolean> resFuture = executorService.submit(() -> {
        new IteratorBasedQueueProducer<>(hoodieRecords.iterator()).produce(queue1);
        return true;
    });
    // waiting for permits to expire.
    while (!isQueueFull(queue1.rateLimiter)) {
        Thread.sleep(10);
    }
    // notify queueing thread of an exception and ensure that it exits.
    final Exception e = new Exception("Failing it :)");
    queue1.markAsFailed(e);
    final Throwable thrown1 = assertThrows(ExecutionException.class, resFuture::get, "exception is expected");
    assertEquals(HoodieException.class, thrown1.getCause().getClass());
    assertEquals(e, thrown1.getCause().getCause());
    // second let us raise an exception while doing record queueing. this exception should get
    // propagated to
    // queue iterator reader.
    final RuntimeException expectedException = new RuntimeException("failing record reading");
    final Iterator<HoodieRecord> mockHoodieRecordsIterator = mock(Iterator.class);
    when(mockHoodieRecordsIterator.hasNext()).thenReturn(true);
    when(mockHoodieRecordsIterator.next()).thenThrow(expectedException);
    BoundedInMemoryQueue<HoodieRecord, Tuple2<HoodieRecord, Option<IndexedRecord>>> queue2 = new BoundedInMemoryQueue(memoryLimitInBytes, getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
    // Produce
    Future<Boolean> res = executorService.submit(() -> {
        try {
            new IteratorBasedQueueProducer<>(mockHoodieRecordsIterator).produce(queue2);
        } catch (Exception ex) {
            queue2.markAsFailed(ex);
            throw ex;
        }
        return true;
    });
    final Throwable thrown2 = assertThrows(Exception.class, () -> {
        queue2.iterator().hasNext();
    }, "exception is expected");
    assertEquals(expectedException, thrown2.getCause());
    // queueing thread should also have exited. make sure that it is not running.
    final Throwable thrown3 = assertThrows(ExecutionException.class, res::get, "exception is expected");
    assertEquals(expectedException, thrown3.getCause());
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieException(org.apache.hudi.exception.HoodieException) ExecutionException(java.util.concurrent.ExecutionException) Tuple2(scala.Tuple2) BoundedInMemoryQueue(org.apache.hudi.common.util.queue.BoundedInMemoryQueue) DefaultSizeEstimator(org.apache.hudi.common.util.DefaultSizeEstimator) Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)

Example 3 with BoundedInMemoryQueue

use of org.apache.hudi.common.util.queue.BoundedInMemoryQueue in project hudi by apache.

the class TestBoundedInMemoryQueue method testRecordReading.

// Test to ensure that we are reading all records from queue iterator in the same order
// without any exceptions.
@SuppressWarnings("unchecked")
@Test
@Timeout(value = 60)
public void testRecordReading() throws Exception {
    final int numRecords = 128;
    final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
    final BoundedInMemoryQueue<HoodieRecord, HoodieLazyInsertIterable.HoodieInsertValueGenResult> queue = new BoundedInMemoryQueue(FileIOUtils.KB, getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
    // Produce
    Future<Boolean> resFuture = executorService.submit(() -> {
        new IteratorBasedQueueProducer<>(hoodieRecords.iterator()).produce(queue);
        queue.close();
        return true;
    });
    final Iterator<HoodieRecord> originalRecordIterator = hoodieRecords.iterator();
    int recordsRead = 0;
    while (queue.iterator().hasNext()) {
        final HoodieAvroRecord originalRecord = (HoodieAvroRecord) originalRecordIterator.next();
        final Option<IndexedRecord> originalInsertValue = originalRecord.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA);
        final HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord> payload = queue.iterator().next();
        // Ensure that record ordering is guaranteed.
        assertEquals(originalRecord, payload.record);
        // cached insert value matches the expected insert value.
        assertEquals(originalInsertValue, ((HoodieAvroRecord) payload.record).getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA));
        recordsRead++;
    }
    assertFalse(queue.iterator().hasNext() || originalRecordIterator.hasNext());
    // all the records should be read successfully.
    assertEquals(numRecords, recordsRead);
    // should not throw any exceptions.
    resFuture.get();
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) BoundedInMemoryQueue(org.apache.hudi.common.util.queue.BoundedInMemoryQueue) Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)

Aggregations

IndexedRecord (org.apache.avro.generic.IndexedRecord)3 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)3 BoundedInMemoryQueue (org.apache.hudi.common.util.queue.BoundedInMemoryQueue)3 Test (org.junit.jupiter.api.Test)3 Timeout (org.junit.jupiter.api.Timeout)3 ExecutionException (java.util.concurrent.ExecutionException)2 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)2 DefaultSizeEstimator (org.apache.hudi.common.util.DefaultSizeEstimator)2 HoodieException (org.apache.hudi.exception.HoodieException)2 Tuple2 (scala.Tuple2)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Map (java.util.Map)1 Future (java.util.concurrent.Future)1 Semaphore (java.util.concurrent.Semaphore)1 Function (java.util.function.Function)1 Collectors (java.util.stream.Collectors)1 IntStream (java.util.stream.IntStream)1