use of io.confluent.connect.storage.partitioner.TimeBasedPartitioner in project kafka-connect-storage-cloud by confluentinc.
the class TopicPartitionWriterTest method testWriteRecordsAfterScheduleRotationExpiryButNoResetShouldGoToSameFile.
@Test
public void testWriteRecordsAfterScheduleRotationExpiryButNoResetShouldGoToSameFile() throws Exception {
localProps.put(S3SinkConnectorConfig.FLUSH_SIZE_CONFIG, "1000");
localProps.put(S3SinkConnectorConfig.ROTATE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.HOURS.toMillis(1)));
localProps.put(S3SinkConnectorConfig.ROTATE_SCHEDULE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.MINUTES.toMillis(10)));
setUp();
// Define the partitioner
TimeBasedPartitioner<?> partitioner = new TimeBasedPartitioner<>();
parsedConfig.put(PartitionerConfig.PARTITION_DURATION_MS_CONFIG, TimeUnit.DAYS.toMillis(1));
parsedConfig.put(PartitionerConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, MockedWallclockTimestampExtractor.class.getName());
partitioner.configure(parsedConfig);
MockTime time = ((MockedWallclockTimestampExtractor) partitioner.getTimestampExtractor()).time;
// Bring the clock to present.
time.sleep(SYSTEM.milliseconds());
TopicPartitionWriter topicPartitionWriter = new TopicPartitionWriter(TOPIC_PARTITION, storage, writerProvider, partitioner, connectorConfig, context, time, null);
// sleep for 11 minutes after startup
time.sleep(TimeUnit.MINUTES.toMillis(11));
// send new records after ScheduleRotation is expired but not reset
String key = "key";
Schema schema = createSchema();
List<Struct> records = createRecordBatches(schema, 3, 6);
Collection<SinkRecord> sinkRecords = createSinkRecords(records.subList(0, 3), key, schema);
for (SinkRecord record : sinkRecords) {
topicPartitionWriter.buffer(record);
}
// No records written to S3
topicPartitionWriter.write();
// 11 minutes
time.sleep(TimeUnit.MINUTES.toMillis(11));
// Records are written due to scheduled rotation
topicPartitionWriter.write();
topicPartitionWriter.close();
long timestampFirst = time.milliseconds();
String encodedPartitionFirst = getTimebasedEncodedPartition(timestampFirst);
String dirPrefixFirst = partitioner.generatePartitionedPath(TOPIC, encodedPartitionFirst);
List<String> expectedFiles = new ArrayList<>();
for (int i : new int[] { 0 }) {
expectedFiles.add(FileUtils.fileKeyToCommit(topicsDir, dirPrefixFirst, TOPIC_PARTITION, i, extension, ZERO_PAD_FMT));
}
verify(expectedFiles, 3, schema, records);
}
use of io.confluent.connect.storage.partitioner.TimeBasedPartitioner in project kafka-connect-storage-cloud by confluentinc.
the class TopicPartitionWriterTest method testWriteRecordTimeBasedPartitionWallclockMocked.
@Test
public void testWriteRecordTimeBasedPartitionWallclockMocked() throws Exception {
localProps.put(FLUSH_SIZE_CONFIG, "1000");
localProps.put(S3SinkConnectorConfig.ROTATE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.HOURS.toMillis(1)));
setUp();
// Define the partitioner
TimeBasedPartitioner<?> partitioner = new TimeBasedPartitioner<>();
parsedConfig.put(PartitionerConfig.PARTITION_DURATION_MS_CONFIG, TimeUnit.DAYS.toMillis(1));
parsedConfig.put(PartitionerConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, MockedWallclockTimestampExtractor.class.getName());
partitioner.configure(parsedConfig);
TopicPartitionWriter topicPartitionWriter = new TopicPartitionWriter(TOPIC_PARTITION, storage, writerProvider, partitioner, connectorConfig, context, null);
String key = "key";
Schema schema = createSchema();
List<Struct> records = createRecordBatches(schema, 3, 6);
Collection<SinkRecord> sinkRecords = createSinkRecords(records.subList(0, 9), key, schema);
for (SinkRecord record : sinkRecords) {
topicPartitionWriter.buffer(record);
}
MockTime time = ((MockedWallclockTimestampExtractor) partitioner.getTimestampExtractor()).time;
// Bring the clock to present.
time.sleep(SYSTEM.milliseconds());
long timestampFirst = time.milliseconds();
topicPartitionWriter.write();
// 2 hours
time.sleep(2 * 3600 * 1000);
sinkRecords = createSinkRecords(records.subList(9, 18), key, schema, 9);
for (SinkRecord record : sinkRecords) {
topicPartitionWriter.buffer(record);
}
long timestampLater = time.milliseconds();
topicPartitionWriter.write();
// 1 hours and 1 ms, send another record to flush the pending ones.
time.sleep(3600 * 1000 + 1);
sinkRecords = createSinkRecords(records.subList(17, 18), key, schema, 1);
for (SinkRecord record : sinkRecords) {
topicPartitionWriter.buffer(record);
}
topicPartitionWriter.write();
topicPartitionWriter.close();
String encodedPartitionFirst = getTimebasedEncodedPartition(timestampFirst);
String encodedPartitionLater = getTimebasedEncodedPartition(timestampLater);
String dirPrefixFirst = partitioner.generatePartitionedPath(TOPIC, encodedPartitionFirst);
List<String> expectedFiles = new ArrayList<>();
for (int i : new int[] { 0 }) {
expectedFiles.add(FileUtils.fileKeyToCommit(topicsDir, dirPrefixFirst, TOPIC_PARTITION, i, extension, ZERO_PAD_FMT));
}
String dirPrefixLater = partitioner.generatePartitionedPath(TOPIC, encodedPartitionLater);
for (int i : new int[] { 9 }) {
expectedFiles.add(FileUtils.fileKeyToCommit(topicsDir, dirPrefixLater, TOPIC_PARTITION, i, extension, ZERO_PAD_FMT));
}
verify(expectedFiles, 9, schema, records);
}
use of io.confluent.connect.storage.partitioner.TimeBasedPartitioner in project kafka-connect-storage-cloud by confluentinc.
the class TopicPartitionWriterTest method testPropagateRetriableErrorsDuringTimeBasedCommits.
@Test(expected = RetriableException.class)
public void testPropagateRetriableErrorsDuringTimeBasedCommits() throws Exception {
localProps.put(FLUSH_SIZE_CONFIG, "1000");
localProps.put(S3SinkConnectorConfig.ROTATE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.HOURS.toMillis(1)));
localProps.put(S3SinkConnectorConfig.ROTATE_SCHEDULE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.MINUTES.toMillis(10)));
setUpWithCommitException();
// Define the partitioner
TimeBasedPartitioner<?> partitioner = new TimeBasedPartitioner<>();
parsedConfig.put(PartitionerConfig.PARTITION_DURATION_MS_CONFIG, TimeUnit.DAYS.toMillis(1));
parsedConfig.put(PartitionerConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, MockedWallclockTimestampExtractor.class.getName());
partitioner.configure(parsedConfig);
MockTime time = ((MockedWallclockTimestampExtractor) partitioner.getTimestampExtractor()).time;
// Bring the clock to present.
time.sleep(SYSTEM.milliseconds());
TopicPartitionWriter topicPartitionWriter = new TopicPartitionWriter(TOPIC_PARTITION, storage, writerProvider, partitioner, connectorConfig, context, time, null);
String key = "key";
Schema schema = createSchema();
List<Struct> records = createRecordBatches(schema, 3, 6);
Collection<SinkRecord> sinkRecords = createSinkRecords(records.subList(0, 3), key, schema);
for (SinkRecord record : sinkRecords) {
topicPartitionWriter.buffer(record);
}
// No records written to S3
topicPartitionWriter.write();
long timestampFirst = time.milliseconds();
// 11 minutes
time.sleep(TimeUnit.MINUTES.toMillis(11));
// Records are written due to scheduled rotation
topicPartitionWriter.write();
}
use of io.confluent.connect.storage.partitioner.TimeBasedPartitioner in project kafka-connect-storage-cloud by confluentinc.
the class DataWriterParquetTest method testPreCommitOnRotateTime.
@Test
public void testPreCommitOnRotateTime() throws Exception {
// Do not roll on size, only based on time.
localProps.put(S3SinkConnectorConfig.FLUSH_SIZE_CONFIG, "1000");
localProps.put(S3SinkConnectorConfig.ROTATE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.HOURS.toMillis(1)));
setUp();
// Define the partitioner
TimeBasedPartitioner<?> partitioner = new TimeBasedPartitioner<>();
parsedConfig.put(PartitionerConfig.PARTITION_DURATION_MS_CONFIG, TimeUnit.DAYS.toMillis(1));
parsedConfig.put(PartitionerConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, TopicPartitionWriterTest.MockedWallclockTimestampExtractor.class.getName());
partitioner.configure(parsedConfig);
MockTime time = ((TopicPartitionWriterTest.MockedWallclockTimestampExtractor) partitioner.getTimestampExtractor()).time;
// Bring the clock to present.
time.sleep(SYSTEM.milliseconds());
List<SinkRecord> sinkRecords = createRecordsWithTimestamp(4, 0, Collections.singleton(new TopicPartition(TOPIC, PARTITION)), time);
task = new S3SinkTask(connectorConfig, context, storage, partitioner, format, time);
// Perform write
task.put(sinkRecords.subList(0, 3));
Map<TopicPartition, OffsetAndMetadata> offsetsToCommit = task.preCommit(null);
Long[] validOffsets1 = { null, null };
verifyOffsets(offsetsToCommit, validOffsets1, context.assignment());
// 2 hours
time.sleep(TimeUnit.HOURS.toMillis(2));
Long[] validOffsets2 = { 3L, null };
// Rotation is only based on rotate.interval.ms, so I need at least one record to trigger flush.
task.put(sinkRecords.subList(3, 4));
offsetsToCommit = task.preCommit(null);
verifyOffsets(offsetsToCommit, validOffsets2, context.assignment());
task.close(context.assignment());
task.stop();
}
use of io.confluent.connect.storage.partitioner.TimeBasedPartitioner in project kafka-connect-storage-cloud by confluentinc.
the class TopicPartitionWriterTest method testWriteRecordTimeBasedPartitionWallclockMockedWithScheduleRotation.
@Test
public void testWriteRecordTimeBasedPartitionWallclockMockedWithScheduleRotation() throws Exception {
localProps.put(FLUSH_SIZE_CONFIG, "1000");
localProps.put(S3SinkConnectorConfig.ROTATE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.HOURS.toMillis(1)));
localProps.put(S3SinkConnectorConfig.ROTATE_SCHEDULE_INTERVAL_MS_CONFIG, String.valueOf(TimeUnit.MINUTES.toMillis(10)));
setUp();
// Define the partitioner
TimeBasedPartitioner<?> partitioner = new TimeBasedPartitioner<>();
parsedConfig.put(PartitionerConfig.PARTITION_DURATION_MS_CONFIG, TimeUnit.DAYS.toMillis(1));
parsedConfig.put(PartitionerConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, MockedWallclockTimestampExtractor.class.getName());
partitioner.configure(parsedConfig);
MockTime time = ((MockedWallclockTimestampExtractor) partitioner.getTimestampExtractor()).time;
// Bring the clock to present.
time.sleep(SYSTEM.milliseconds());
TopicPartitionWriter topicPartitionWriter = new TopicPartitionWriter(TOPIC_PARTITION, storage, writerProvider, partitioner, connectorConfig, context, time, null);
String key = "key";
Schema schema = createSchema();
List<Struct> records = createRecordBatches(schema, 3, 6);
Collection<SinkRecord> sinkRecords = createSinkRecords(records.subList(0, 3), key, schema);
for (SinkRecord record : sinkRecords) {
topicPartitionWriter.buffer(record);
}
// No records written to S3
topicPartitionWriter.write();
long timestampFirst = time.milliseconds();
// 11 minutes
time.sleep(TimeUnit.MINUTES.toMillis(11));
// Records are written due to scheduled rotation
topicPartitionWriter.write();
sinkRecords = createSinkRecords(records.subList(3, 6), key, schema, 3);
for (SinkRecord record : sinkRecords) {
topicPartitionWriter.buffer(record);
}
// More records later
topicPartitionWriter.write();
long timestampLater = time.milliseconds();
// 11 minutes later, another scheduled rotation
time.sleep(TimeUnit.MINUTES.toMillis(11));
// Again the records are written due to scheduled rotation
topicPartitionWriter.write();
topicPartitionWriter.close();
String encodedPartitionFirst = getTimebasedEncodedPartition(timestampFirst);
String encodedPartitionLater = getTimebasedEncodedPartition(timestampLater);
String dirPrefixFirst = partitioner.generatePartitionedPath(TOPIC, encodedPartitionFirst);
List<String> expectedFiles = new ArrayList<>();
for (int i : new int[] { 0 }) {
expectedFiles.add(FileUtils.fileKeyToCommit(topicsDir, dirPrefixFirst, TOPIC_PARTITION, i, extension, ZERO_PAD_FMT));
}
String dirPrefixLater = partitioner.generatePartitionedPath(TOPIC, encodedPartitionLater);
for (int i : new int[] { 3 }) {
expectedFiles.add(FileUtils.fileKeyToCommit(topicsDir, dirPrefixLater, TOPIC_PARTITION, i, extension, ZERO_PAD_FMT));
}
verify(expectedFiles, 3, schema, records);
}
Aggregations