Search in sources :

Example 1 with RecordWriter

use of io.confluent.connect.storage.format.RecordWriter in project kafka-connect-storage-cloud by confluentinc.

the class AvroRecordWriterProvider method getRecordWriter.

@Override
public RecordWriter getRecordWriter(final S3SinkConnectorConfig conf, final String filename) {
    // This is not meant to be a thread-safe writer!
    return new RecordWriter() {

        final DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>());

        Schema schema = null;

        S3OutputStream s3out;

        @Override
        public void write(SinkRecord record) {
            if (schema == null) {
                schema = record.valueSchema();
                try {
                    log.info("Opening record writer for: {}", filename);
                    s3out = storage.create(filename, true);
                    org.apache.avro.Schema avroSchema = avroData.fromConnectSchema(schema);
                    writer.setCodec(CodecFactory.fromString(conf.getAvroCodec()));
                    writer.create(avroSchema, s3out);
                } catch (IOException e) {
                    throw new ConnectException(e);
                }
            }
            log.trace("Sink record: {}", record);
            Object value = avroData.fromConnectData(schema, record.value());
            try {
                // NonRecordContainers to just their value to properly handle these types
                if (value instanceof NonRecordContainer) {
                    value = ((NonRecordContainer) value).getValue();
                }
                writer.append(value);
            } catch (IOException e) {
                throw new ConnectException(e);
            }
        }

        @Override
        public void commit() {
            try {
                // Flush is required here, because closing the writer will close the underlying S3
                // output stream before committing any data to S3.
                writer.flush();
                s3out.commit();
                writer.close();
            } catch (IOException e) {
                throw new ConnectException(e);
            }
        }

        @Override
        public void close() {
            try {
                writer.close();
            } catch (IOException e) {
                throw new ConnectException(e);
            }
        }
    };
}
Also used : DataFileWriter(org.apache.avro.file.DataFileWriter) Schema(org.apache.kafka.connect.data.Schema) S3OutputStream(io.confluent.connect.s3.storage.S3OutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) IOException(java.io.IOException) SinkRecord(org.apache.kafka.connect.sink.SinkRecord) RecordWriter(io.confluent.connect.storage.format.RecordWriter) NonRecordContainer(io.confluent.kafka.serializers.NonRecordContainer) ConnectException(org.apache.kafka.connect.errors.ConnectException)

Example 2 with RecordWriter

use of io.confluent.connect.storage.format.RecordWriter in project kafka-connect-storage-cloud by confluentinc.

the class ByteArrayRecordWriterProvider method getRecordWriter.

@Override
public RecordWriter getRecordWriter(final S3SinkConnectorConfig conf, final String filename) {
    return new RecordWriter() {

        final S3OutputStream s3out = storage.create(filename, true);

        final OutputStream s3outWrapper = s3out.wrapForCompression();

        @Override
        public void write(SinkRecord record) {
            log.trace("Sink record: {}", record);
            try {
                byte[] bytes = converter.fromConnectData(record.topic(), record.valueSchema(), record.value());
                s3outWrapper.write(bytes);
                s3outWrapper.write(lineSeparatorBytes);
            } catch (IOException | DataException e) {
                throw new ConnectException(e);
            }
        }

        @Override
        public void commit() {
            try {
                s3out.commit();
                s3outWrapper.close();
            } catch (IOException e) {
                throw new ConnectException(e);
            }
        }

        @Override
        public void close() {
        }
    };
}
Also used : DataException(org.apache.kafka.connect.errors.DataException) RecordWriter(io.confluent.connect.storage.format.RecordWriter) OutputStream(java.io.OutputStream) S3OutputStream(io.confluent.connect.s3.storage.S3OutputStream) S3OutputStream(io.confluent.connect.s3.storage.S3OutputStream) IOException(java.io.IOException) SinkRecord(org.apache.kafka.connect.sink.SinkRecord) ConnectException(org.apache.kafka.connect.errors.ConnectException)

Example 3 with RecordWriter

use of io.confluent.connect.storage.format.RecordWriter in project kafka-connect-storage-cloud by confluentinc.

the class JsonRecordWriterProvider method getRecordWriter.

@Override
public RecordWriter getRecordWriter(final S3SinkConnectorConfig conf, final String filename) {
    try {
        return new RecordWriter() {

            final S3OutputStream s3out = storage.create(filename, true);

            final OutputStream s3outWrapper = s3out.wrapForCompression();

            final JsonGenerator writer = mapper.getFactory().createGenerator(s3outWrapper).setRootValueSeparator(null);

            @Override
            public void write(SinkRecord record) {
                log.trace("Sink record: {}", record);
                try {
                    Object value = record.value();
                    if (value instanceof Struct) {
                        byte[] rawJson = converter.fromConnectData(record.topic(), record.valueSchema(), value);
                        s3outWrapper.write(rawJson);
                        s3outWrapper.write(LINE_SEPARATOR_BYTES);
                    } else {
                        writer.writeObject(value);
                        writer.writeRaw(LINE_SEPARATOR);
                    }
                } catch (IOException e) {
                    throw new ConnectException(e);
                }
            }

            @Override
            public void commit() {
                try {
                    // Flush is required here, because closing the writer will close the underlying S3
                    // output stream before committing any data to S3.
                    writer.flush();
                    s3out.commit();
                    s3outWrapper.close();
                } catch (IOException e) {
                    throw new ConnectException(e);
                }
            }

            @Override
            public void close() {
                try {
                    writer.close();
                } catch (IOException e) {
                    throw new ConnectException(e);
                }
            }
        };
    } catch (IOException e) {
        throw new ConnectException(e);
    }
}
Also used : RecordWriter(io.confluent.connect.storage.format.RecordWriter) OutputStream(java.io.OutputStream) S3OutputStream(io.confluent.connect.s3.storage.S3OutputStream) S3OutputStream(io.confluent.connect.s3.storage.S3OutputStream) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) IOException(java.io.IOException) SinkRecord(org.apache.kafka.connect.sink.SinkRecord) Struct(org.apache.kafka.connect.data.Struct) ConnectException(org.apache.kafka.connect.errors.ConnectException)

Example 4 with RecordWriter

use of io.confluent.connect.storage.format.RecordWriter in project kafka-connect-storage-cloud by confluentinc.

the class TopicPartitionWriter method close.

public void close() throws ConnectException {
    log.debug("Closing TopicPartitionWriter {}", tp);
    for (RecordWriter writer : writers.values()) {
        writer.close();
    }
    writers.clear();
    startOffsets.clear();
}
Also used : RecordWriter(io.confluent.connect.storage.format.RecordWriter)

Example 5 with RecordWriter

use of io.confluent.connect.storage.format.RecordWriter in project kafka-connect-storage-cloud by confluentinc.

the class TopicPartitionWriter method getWriter.

private RecordWriter getWriter(SinkRecord record, String encodedPartition) throws ConnectException {
    if (writers.containsKey(encodedPartition)) {
        return writers.get(encodedPartition);
    }
    String commitFilename = getCommitFilename(encodedPartition);
    log.debug("Creating new writer encodedPartition='{}' filename='{}'", encodedPartition, commitFilename);
    RecordWriter writer = writerProvider.getRecordWriter(connectorConfig, commitFilename);
    writers.put(encodedPartition, writer);
    return writer;
}
Also used : RecordWriter(io.confluent.connect.storage.format.RecordWriter)

Aggregations

RecordWriter (io.confluent.connect.storage.format.RecordWriter)7 S3OutputStream (io.confluent.connect.s3.storage.S3OutputStream)3 IOException (java.io.IOException)3 ConnectException (org.apache.kafka.connect.errors.ConnectException)3 SinkRecord (org.apache.kafka.connect.sink.SinkRecord)3 OutputStream (java.io.OutputStream)2 JsonGenerator (com.fasterxml.jackson.core.JsonGenerator)1 NonRecordContainer (io.confluent.kafka.serializers.NonRecordContainer)1 DataFileWriter (org.apache.avro.file.DataFileWriter)1 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)1 Schema (org.apache.kafka.connect.data.Schema)1 Struct (org.apache.kafka.connect.data.Struct)1 DataException (org.apache.kafka.connect.errors.DataException)1