Search in sources :

Example 6 with OffsetRange

use of org.apache.spark.streaming.kafka010.OffsetRange in project hudi by apache.

the class TestKafkaOffsetGen method testGetNextOffsetRangesFromGroup.

@Test
public void testGetNextOffsetRangesFromGroup() {
    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
    testUtils.createTopic(TEST_TOPIC_NAME, 2);
    testUtils.sendMessages(TEST_TOPIC_NAME, Helpers.jsonifyRecords(dataGenerator.generateInserts("000", 1000)));
    KafkaOffsetGen kafkaOffsetGen = new KafkaOffsetGen(getConsumerConfigs("group", "string"));
    String lastCheckpointString = TEST_TOPIC_NAME + ",0:250,1:249";
    kafkaOffsetGen.commitOffsetToKafka(lastCheckpointString);
    // don't pass lastCheckpointString as we want to read from group committed offset
    OffsetRange[] nextOffsetRanges = kafkaOffsetGen.getNextOffsetRanges(Option.empty(), 300, metrics);
    assertEquals(250, nextOffsetRanges[0].fromOffset());
    assertEquals(400, nextOffsetRanges[0].untilOffset());
    assertEquals(249, nextOffsetRanges[1].fromOffset());
    assertEquals(399, nextOffsetRanges[1].untilOffset());
    // committed offsets are not present for the consumer group
    kafkaOffsetGen = new KafkaOffsetGen(getConsumerConfigs("group", "string"));
    nextOffsetRanges = kafkaOffsetGen.getNextOffsetRanges(Option.empty(), 300, metrics);
    assertEquals(500, nextOffsetRanges[0].fromOffset());
    assertEquals(500, nextOffsetRanges[0].untilOffset());
    assertEquals(500, nextOffsetRanges[1].fromOffset());
    assertEquals(500, nextOffsetRanges[1].untilOffset());
}
Also used : OffsetRange(org.apache.spark.streaming.kafka010.OffsetRange) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Test(org.junit.jupiter.api.Test)

Example 7 with OffsetRange

use of org.apache.spark.streaming.kafka010.OffsetRange in project hudi by apache.

the class TestKafkaOffsetGen method testGetNextOffsetRangesFromLatest.

@Test
public void testGetNextOffsetRangesFromLatest() {
    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
    testUtils.createTopic(TEST_TOPIC_NAME, 1);
    testUtils.sendMessages(TEST_TOPIC_NAME, Helpers.jsonifyRecords(dataGenerator.generateInserts("000", 1000)));
    KafkaOffsetGen kafkaOffsetGen = new KafkaOffsetGen(getConsumerConfigs("latest", "string"));
    OffsetRange[] nextOffsetRanges = kafkaOffsetGen.getNextOffsetRanges(Option.empty(), 500, metrics);
    assertEquals(1, nextOffsetRanges.length);
    assertEquals(1000, nextOffsetRanges[0].fromOffset());
    assertEquals(1000, nextOffsetRanges[0].untilOffset());
}
Also used : OffsetRange(org.apache.spark.streaming.kafka010.OffsetRange) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Test(org.junit.jupiter.api.Test)

Example 8 with OffsetRange

use of org.apache.spark.streaming.kafka010.OffsetRange in project hudi by apache.

the class TestKafkaOffsetGen method testGetNextOffsetRangesFromCheckpoint.

@Test
public void testGetNextOffsetRangesFromCheckpoint() {
    String lastCheckpointString = TEST_TOPIC_NAME + ",0:250";
    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
    testUtils.createTopic(TEST_TOPIC_NAME, 1);
    testUtils.sendMessages(TEST_TOPIC_NAME, Helpers.jsonifyRecords(dataGenerator.generateInserts("000", 1000)));
    KafkaOffsetGen kafkaOffsetGen = new KafkaOffsetGen(getConsumerConfigs("latest", "string"));
    OffsetRange[] nextOffsetRanges = kafkaOffsetGen.getNextOffsetRanges(Option.of(lastCheckpointString), 500, metrics);
    assertEquals(1, nextOffsetRanges.length);
    assertEquals(250, nextOffsetRanges[0].fromOffset());
    assertEquals(750, nextOffsetRanges[0].untilOffset());
}
Also used : OffsetRange(org.apache.spark.streaming.kafka010.OffsetRange) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Test(org.junit.jupiter.api.Test)

Example 9 with OffsetRange

use of org.apache.spark.streaming.kafka010.OffsetRange in project hudi by apache.

the class TestKafkaOffsetGen method testGetNextOffsetRangesFromEarliest.

@Test
public void testGetNextOffsetRangesFromEarliest() {
    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
    testUtils.createTopic(TEST_TOPIC_NAME, 1);
    testUtils.sendMessages(TEST_TOPIC_NAME, Helpers.jsonifyRecords(dataGenerator.generateInserts("000", 1000)));
    KafkaOffsetGen kafkaOffsetGen = new KafkaOffsetGen(getConsumerConfigs("earliest", "string"));
    OffsetRange[] nextOffsetRanges = kafkaOffsetGen.getNextOffsetRanges(Option.empty(), 500, metrics);
    assertEquals(1, nextOffsetRanges.length);
    assertEquals(0, nextOffsetRanges[0].fromOffset());
    assertEquals(500, nextOffsetRanges[0].untilOffset());
    nextOffsetRanges = kafkaOffsetGen.getNextOffsetRanges(Option.empty(), 5000, metrics);
    assertEquals(1, nextOffsetRanges.length);
    assertEquals(0, nextOffsetRanges[0].fromOffset());
    assertEquals(1000, nextOffsetRanges[0].untilOffset());
}
Also used : OffsetRange(org.apache.spark.streaming.kafka010.OffsetRange) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Test(org.junit.jupiter.api.Test)

Example 10 with OffsetRange

use of org.apache.spark.streaming.kafka010.OffsetRange in project hudi by apache.

the class TestKafkaOffsetGen method testGetNextOffsetRangesFromMultiplePartitions.

@Test
public void testGetNextOffsetRangesFromMultiplePartitions() {
    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
    testUtils.createTopic(TEST_TOPIC_NAME, 2);
    testUtils.sendMessages(TEST_TOPIC_NAME, Helpers.jsonifyRecords(dataGenerator.generateInserts("000", 1000)));
    KafkaOffsetGen kafkaOffsetGen = new KafkaOffsetGen(getConsumerConfigs("earliest", "string"));
    OffsetRange[] nextOffsetRanges = kafkaOffsetGen.getNextOffsetRanges(Option.empty(), 499, metrics);
    assertEquals(2, nextOffsetRanges.length);
    assertEquals(0, nextOffsetRanges[0].fromOffset());
    assertEquals(250, nextOffsetRanges[0].untilOffset());
    assertEquals(0, nextOffsetRanges[1].fromOffset());
    assertEquals(249, nextOffsetRanges[1].untilOffset());
}
Also used : OffsetRange(org.apache.spark.streaming.kafka010.OffsetRange) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Test(org.junit.jupiter.api.Test)

Aggregations

OffsetRange (org.apache.spark.streaming.kafka010.OffsetRange)14 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)6 Test (org.junit.jupiter.api.Test)6 HoodieException (org.apache.hudi.exception.HoodieException)3 JavaRDD (org.apache.spark.api.java.JavaRDD)3 IOException (java.io.IOException)2 Arrays (java.util.Arrays)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Collectors (java.util.stream.Collectors)2 GenericRecord (org.apache.avro.generic.GenericRecord)2 TypedProperties (org.apache.hudi.common.config.TypedProperties)2 Option (org.apache.hudi.common.util.Option)2 HoodieDeltaStreamerMetrics (org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamerMetrics)2 HoodieSourceTimeoutException (org.apache.hudi.utilities.exception.HoodieSourceTimeoutException)2 TopicPartition (org.apache.kafka.common.TopicPartition)2 StringDeserializer (org.apache.kafka.common.serialization.StringDeserializer)2 CanCommitOffsets (org.apache.spark.streaming.kafka010.CanCommitOffsets)2 HasOffsetRanges (org.apache.spark.streaming.kafka010.HasOffsetRanges)2 IoTData (com.apssouza.iot.common.dto.IoTData)1