Search in sources :

Example 1 with TimeBasedPartitioner

use of io.confluent.connect.storage.partitioner.TimeBasedPartitioner in project kafka-connect-storage-cloud by confluentinc.

the class TopicPartitionWriterTest method testWriteRecordsAfterScheduleRotationExpiryButNoResetShouldGoToSameFile.

@Test
public void testWriteRecordsAfterScheduleRotationExpiryButNoResetShouldGoToSameFile() throws Exception {
    localProps.put(S3SinkConnectorConfig.FLUSH_SIZE_CONFIG, "1000");
    localProps.put(S3SinkConnectorConfig.ROTATE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.HOURS.toMillis(1)));
    localProps.put(S3SinkConnectorConfig.ROTATE_SCHEDULE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.MINUTES.toMillis(10)));
    setUp();
    // Define the partitioner
    TimeBasedPartitioner<?> partitioner = new TimeBasedPartitioner<>();
    parsedConfig.put(PartitionerConfig.PARTITION_DURATION_MS_CONFIG, TimeUnit.DAYS.toMillis(1));
    parsedConfig.put(PartitionerConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, MockedWallclockTimestampExtractor.class.getName());
    partitioner.configure(parsedConfig);
    MockTime time = ((MockedWallclockTimestampExtractor) partitioner.getTimestampExtractor()).time;
    // Bring the clock to present.
    time.sleep(SYSTEM.milliseconds());
    TopicPartitionWriter topicPartitionWriter = new TopicPartitionWriter(TOPIC_PARTITION, storage, writerProvider, partitioner, connectorConfig, context, time, null);
    // sleep for 11 minutes after startup
    time.sleep(TimeUnit.MINUTES.toMillis(11));
    // send new records after ScheduleRotation is expired but not reset
    String key = "key";
    Schema schema = createSchema();
    List<Struct> records = createRecordBatches(schema, 3, 6);
    Collection<SinkRecord> sinkRecords = createSinkRecords(records.subList(0, 3), key, schema);
    for (SinkRecord record : sinkRecords) {
        topicPartitionWriter.buffer(record);
    }
    // No records written to S3
    topicPartitionWriter.write();
    // 11 minutes
    time.sleep(TimeUnit.MINUTES.toMillis(11));
    // Records are written due to scheduled rotation
    topicPartitionWriter.write();
    topicPartitionWriter.close();
    long timestampFirst = time.milliseconds();
    String encodedPartitionFirst = getTimebasedEncodedPartition(timestampFirst);
    String dirPrefixFirst = partitioner.generatePartitionedPath(TOPIC, encodedPartitionFirst);
    List<String> expectedFiles = new ArrayList<>();
    for (int i : new int[] { 0 }) {
        expectedFiles.add(FileUtils.fileKeyToCommit(topicsDir, dirPrefixFirst, TOPIC_PARTITION, i, extension, ZERO_PAD_FMT));
    }
    verify(expectedFiles, 3, schema, records);
}
Also used : Schema(org.apache.kafka.connect.data.Schema) ArrayList(java.util.ArrayList) Utils.sinkRecordToLoggableString(io.confluent.connect.s3.util.Utils.sinkRecordToLoggableString) SinkRecord(org.apache.kafka.connect.sink.SinkRecord) TimeBasedPartitioner(io.confluent.connect.storage.partitioner.TimeBasedPartitioner) Struct(org.apache.kafka.connect.data.Struct) MockTime(io.confluent.common.utils.MockTime) Test(org.junit.Test)

Example 2 with TimeBasedPartitioner

use of io.confluent.connect.storage.partitioner.TimeBasedPartitioner in project kafka-connect-storage-cloud by confluentinc.

the class TopicPartitionWriterTest method testWriteRecordTimeBasedPartitionWallclockMocked.

@Test
public void testWriteRecordTimeBasedPartitionWallclockMocked() throws Exception {
    localProps.put(FLUSH_SIZE_CONFIG, "1000");
    localProps.put(S3SinkConnectorConfig.ROTATE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.HOURS.toMillis(1)));
    setUp();
    // Define the partitioner
    TimeBasedPartitioner<?> partitioner = new TimeBasedPartitioner<>();
    parsedConfig.put(PartitionerConfig.PARTITION_DURATION_MS_CONFIG, TimeUnit.DAYS.toMillis(1));
    parsedConfig.put(PartitionerConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, MockedWallclockTimestampExtractor.class.getName());
    partitioner.configure(parsedConfig);
    TopicPartitionWriter topicPartitionWriter = new TopicPartitionWriter(TOPIC_PARTITION, storage, writerProvider, partitioner, connectorConfig, context, null);
    String key = "key";
    Schema schema = createSchema();
    List<Struct> records = createRecordBatches(schema, 3, 6);
    Collection<SinkRecord> sinkRecords = createSinkRecords(records.subList(0, 9), key, schema);
    for (SinkRecord record : sinkRecords) {
        topicPartitionWriter.buffer(record);
    }
    MockTime time = ((MockedWallclockTimestampExtractor) partitioner.getTimestampExtractor()).time;
    // Bring the clock to present.
    time.sleep(SYSTEM.milliseconds());
    long timestampFirst = time.milliseconds();
    topicPartitionWriter.write();
    // 2 hours
    time.sleep(2 * 3600 * 1000);
    sinkRecords = createSinkRecords(records.subList(9, 18), key, schema, 9);
    for (SinkRecord record : sinkRecords) {
        topicPartitionWriter.buffer(record);
    }
    long timestampLater = time.milliseconds();
    topicPartitionWriter.write();
    // 1 hours and 1 ms, send another record to flush the pending ones.
    time.sleep(3600 * 1000 + 1);
    sinkRecords = createSinkRecords(records.subList(17, 18), key, schema, 1);
    for (SinkRecord record : sinkRecords) {
        topicPartitionWriter.buffer(record);
    }
    topicPartitionWriter.write();
    topicPartitionWriter.close();
    String encodedPartitionFirst = getTimebasedEncodedPartition(timestampFirst);
    String encodedPartitionLater = getTimebasedEncodedPartition(timestampLater);
    String dirPrefixFirst = partitioner.generatePartitionedPath(TOPIC, encodedPartitionFirst);
    List<String> expectedFiles = new ArrayList<>();
    for (int i : new int[] { 0 }) {
        expectedFiles.add(FileUtils.fileKeyToCommit(topicsDir, dirPrefixFirst, TOPIC_PARTITION, i, extension, ZERO_PAD_FMT));
    }
    String dirPrefixLater = partitioner.generatePartitionedPath(TOPIC, encodedPartitionLater);
    for (int i : new int[] { 9 }) {
        expectedFiles.add(FileUtils.fileKeyToCommit(topicsDir, dirPrefixLater, TOPIC_PARTITION, i, extension, ZERO_PAD_FMT));
    }
    verify(expectedFiles, 9, schema, records);
}
Also used : Schema(org.apache.kafka.connect.data.Schema) ArrayList(java.util.ArrayList) Utils.sinkRecordToLoggableString(io.confluent.connect.s3.util.Utils.sinkRecordToLoggableString) SinkRecord(org.apache.kafka.connect.sink.SinkRecord) TimeBasedPartitioner(io.confluent.connect.storage.partitioner.TimeBasedPartitioner) Struct(org.apache.kafka.connect.data.Struct) MockTime(io.confluent.common.utils.MockTime) Test(org.junit.Test)

Example 3 with TimeBasedPartitioner

use of io.confluent.connect.storage.partitioner.TimeBasedPartitioner in project kafka-connect-storage-cloud by confluentinc.

the class TopicPartitionWriterTest method testPropagateRetriableErrorsDuringTimeBasedCommits.

@Test(expected = RetriableException.class)
public void testPropagateRetriableErrorsDuringTimeBasedCommits() throws Exception {
    localProps.put(FLUSH_SIZE_CONFIG, "1000");
    localProps.put(S3SinkConnectorConfig.ROTATE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.HOURS.toMillis(1)));
    localProps.put(S3SinkConnectorConfig.ROTATE_SCHEDULE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.MINUTES.toMillis(10)));
    setUpWithCommitException();
    // Define the partitioner
    TimeBasedPartitioner<?> partitioner = new TimeBasedPartitioner<>();
    parsedConfig.put(PartitionerConfig.PARTITION_DURATION_MS_CONFIG, TimeUnit.DAYS.toMillis(1));
    parsedConfig.put(PartitionerConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, MockedWallclockTimestampExtractor.class.getName());
    partitioner.configure(parsedConfig);
    MockTime time = ((MockedWallclockTimestampExtractor) partitioner.getTimestampExtractor()).time;
    // Bring the clock to present.
    time.sleep(SYSTEM.milliseconds());
    TopicPartitionWriter topicPartitionWriter = new TopicPartitionWriter(TOPIC_PARTITION, storage, writerProvider, partitioner, connectorConfig, context, time, null);
    String key = "key";
    Schema schema = createSchema();
    List<Struct> records = createRecordBatches(schema, 3, 6);
    Collection<SinkRecord> sinkRecords = createSinkRecords(records.subList(0, 3), key, schema);
    for (SinkRecord record : sinkRecords) {
        topicPartitionWriter.buffer(record);
    }
    // No records written to S3
    topicPartitionWriter.write();
    long timestampFirst = time.milliseconds();
    // 11 minutes
    time.sleep(TimeUnit.MINUTES.toMillis(11));
    // Records are written due to scheduled rotation
    topicPartitionWriter.write();
}
Also used : Schema(org.apache.kafka.connect.data.Schema) Utils.sinkRecordToLoggableString(io.confluent.connect.s3.util.Utils.sinkRecordToLoggableString) SinkRecord(org.apache.kafka.connect.sink.SinkRecord) MockTime(io.confluent.common.utils.MockTime) TimeBasedPartitioner(io.confluent.connect.storage.partitioner.TimeBasedPartitioner) Struct(org.apache.kafka.connect.data.Struct) Test(org.junit.Test)

Example 4 with TimeBasedPartitioner

use of io.confluent.connect.storage.partitioner.TimeBasedPartitioner in project kafka-connect-storage-cloud by confluentinc.

the class DataWriterParquetTest method testPreCommitOnRotateTime.

@Test
public void testPreCommitOnRotateTime() throws Exception {
    // Do not roll on size, only based on time.
    localProps.put(S3SinkConnectorConfig.FLUSH_SIZE_CONFIG, "1000");
    localProps.put(S3SinkConnectorConfig.ROTATE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.HOURS.toMillis(1)));
    setUp();
    // Define the partitioner
    TimeBasedPartitioner<?> partitioner = new TimeBasedPartitioner<>();
    parsedConfig.put(PartitionerConfig.PARTITION_DURATION_MS_CONFIG, TimeUnit.DAYS.toMillis(1));
    parsedConfig.put(PartitionerConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, TopicPartitionWriterTest.MockedWallclockTimestampExtractor.class.getName());
    partitioner.configure(parsedConfig);
    MockTime time = ((TopicPartitionWriterTest.MockedWallclockTimestampExtractor) partitioner.getTimestampExtractor()).time;
    // Bring the clock to present.
    time.sleep(SYSTEM.milliseconds());
    List<SinkRecord> sinkRecords = createRecordsWithTimestamp(4, 0, Collections.singleton(new TopicPartition(TOPIC, PARTITION)), time);
    task = new S3SinkTask(connectorConfig, context, storage, partitioner, format, time);
    // Perform write
    task.put(sinkRecords.subList(0, 3));
    Map<TopicPartition, OffsetAndMetadata> offsetsToCommit = task.preCommit(null);
    Long[] validOffsets1 = { null, null };
    verifyOffsets(offsetsToCommit, validOffsets1, context.assignment());
    // 2 hours
    time.sleep(TimeUnit.HOURS.toMillis(2));
    Long[] validOffsets2 = { 3L, null };
    // Rotation is only based on rotate.interval.ms, so I need at least one record to trigger flush.
    task.put(sinkRecords.subList(3, 4));
    offsetsToCommit = task.preCommit(null);
    verifyOffsets(offsetsToCommit, validOffsets2, context.assignment());
    task.close(context.assignment());
    task.stop();
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) SinkRecord(org.apache.kafka.connect.sink.SinkRecord) MockTime(io.confluent.common.utils.MockTime) TimeBasedPartitioner(io.confluent.connect.storage.partitioner.TimeBasedPartitioner) Test(org.junit.Test)

Example 5 with TimeBasedPartitioner

use of io.confluent.connect.storage.partitioner.TimeBasedPartitioner in project kafka-connect-storage-cloud by confluentinc.

the class TopicPartitionWriterTest method testWriteRecordTimeBasedPartitionWallclockMockedWithScheduleRotation.

@Test
public void testWriteRecordTimeBasedPartitionWallclockMockedWithScheduleRotation() throws Exception {
    localProps.put(FLUSH_SIZE_CONFIG, "1000");
    localProps.put(S3SinkConnectorConfig.ROTATE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.HOURS.toMillis(1)));
    localProps.put(S3SinkConnectorConfig.ROTATE_SCHEDULE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.MINUTES.toMillis(10)));
    setUp();
    // Define the partitioner
    TimeBasedPartitioner<?> partitioner = new TimeBasedPartitioner<>();
    parsedConfig.put(PartitionerConfig.PARTITION_DURATION_MS_CONFIG, TimeUnit.DAYS.toMillis(1));
    parsedConfig.put(PartitionerConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, MockedWallclockTimestampExtractor.class.getName());
    partitioner.configure(parsedConfig);
    MockTime time = ((MockedWallclockTimestampExtractor) partitioner.getTimestampExtractor()).time;
    // Bring the clock to present.
    time.sleep(SYSTEM.milliseconds());
    TopicPartitionWriter topicPartitionWriter = new TopicPartitionWriter(TOPIC_PARTITION, storage, writerProvider, partitioner, connectorConfig, context, time, null);
    String key = "key";
    Schema schema = createSchema();
    List<Struct> records = createRecordBatches(schema, 3, 6);
    Collection<SinkRecord> sinkRecords = createSinkRecords(records.subList(0, 3), key, schema);
    for (SinkRecord record : sinkRecords) {
        topicPartitionWriter.buffer(record);
    }
    // No records written to S3
    topicPartitionWriter.write();
    long timestampFirst = time.milliseconds();
    // 11 minutes
    time.sleep(TimeUnit.MINUTES.toMillis(11));
    // Records are written due to scheduled rotation
    topicPartitionWriter.write();
    sinkRecords = createSinkRecords(records.subList(3, 6), key, schema, 3);
    for (SinkRecord record : sinkRecords) {
        topicPartitionWriter.buffer(record);
    }
    // More records later
    topicPartitionWriter.write();
    long timestampLater = time.milliseconds();
    // 11 minutes later, another scheduled rotation
    time.sleep(TimeUnit.MINUTES.toMillis(11));
    // Again the records are written due to scheduled rotation
    topicPartitionWriter.write();
    topicPartitionWriter.close();
    String encodedPartitionFirst = getTimebasedEncodedPartition(timestampFirst);
    String encodedPartitionLater = getTimebasedEncodedPartition(timestampLater);
    String dirPrefixFirst = partitioner.generatePartitionedPath(TOPIC, encodedPartitionFirst);
    List<String> expectedFiles = new ArrayList<>();
    for (int i : new int[] { 0 }) {
        expectedFiles.add(FileUtils.fileKeyToCommit(topicsDir, dirPrefixFirst, TOPIC_PARTITION, i, extension, ZERO_PAD_FMT));
    }
    String dirPrefixLater = partitioner.generatePartitionedPath(TOPIC, encodedPartitionLater);
    for (int i : new int[] { 3 }) {
        expectedFiles.add(FileUtils.fileKeyToCommit(topicsDir, dirPrefixLater, TOPIC_PARTITION, i, extension, ZERO_PAD_FMT));
    }
    verify(expectedFiles, 3, schema, records);
}
Also used : Schema(org.apache.kafka.connect.data.Schema) ArrayList(java.util.ArrayList) Utils.sinkRecordToLoggableString(io.confluent.connect.s3.util.Utils.sinkRecordToLoggableString) SinkRecord(org.apache.kafka.connect.sink.SinkRecord) TimeBasedPartitioner(io.confluent.connect.storage.partitioner.TimeBasedPartitioner) Struct(org.apache.kafka.connect.data.Struct) MockTime(io.confluent.common.utils.MockTime) Test(org.junit.Test)

Aggregations

TimeBasedPartitioner (io.confluent.connect.storage.partitioner.TimeBasedPartitioner)12 SinkRecord (org.apache.kafka.connect.sink.SinkRecord)12 Test (org.junit.Test)12 MockTime (io.confluent.common.utils.MockTime)10 Utils.sinkRecordToLoggableString (io.confluent.connect.s3.util.Utils.sinkRecordToLoggableString)8 Schema (org.apache.kafka.connect.data.Schema)8 Struct (org.apache.kafka.connect.data.Struct)8 ArrayList (java.util.ArrayList)6 OffsetAndMetadata (org.apache.kafka.clients.consumer.OffsetAndMetadata)4 TopicPartition (org.apache.kafka.common.TopicPartition)4 SystemTime (io.confluent.common.utils.SystemTime)1 Time (io.confluent.common.utils.Time)1 DateTime (org.joda.time.DateTime)1