Search in sources :

Example 1 with BinaryEncoder

use of org.apache.avro.io.BinaryEncoder in project storm by apache.

the class AbstractAvroSerializer method write.

@Override
public void write(Kryo kryo, Output output, GenericContainer record) {
    String fingerPrint = this.getFingerprint(record.getSchema());
    output.writeString(fingerPrint);
    GenericDatumWriter<GenericContainer> writer = new GenericDatumWriter<>(record.getSchema());
    BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder(output, null);
    try {
        writer.write(record, encoder);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : BinaryEncoder(org.apache.avro.io.BinaryEncoder) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) IOException(java.io.IOException) GenericContainer(org.apache.avro.generic.GenericContainer)

Example 2 with BinaryEncoder

use of org.apache.avro.io.BinaryEncoder in project databus by linkedin.

the class RelayEventGenerator method populateEvents.

int populateEvents(String source, short id, GenericRecord record, DbusEventKey key, byte[] schemaId, DbusEventsStatisticsCollector statsCollector, DbusEventBufferAppendable buffer) {
    if (record != null && key != null) {
        try {
            // Serialize the row
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            Encoder encoder = new BinaryEncoder(bos);
            GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());
            writer.write(record, encoder);
            byte[] serializedValue = bos.toByteArray();
            short pPartitionId = RngUtils.randomPositiveShort();
            short lPartitionId = RngUtils.randomPositiveShort();
            long timeStamp = System.currentTimeMillis() * 1000000;
            buffer.appendEvent(key, pPartitionId, lPartitionId, timeStamp, id, schemaId, serializedValue, false, statsCollector);
            return 1;
        } catch (IOException io) {
            LOG.error("Cannot create byte stream payload: " + source);
        }
    }
    return 0;
}
Also used : BinaryEncoder(org.apache.avro.io.BinaryEncoder) Encoder(org.apache.avro.io.Encoder) BinaryEncoder(org.apache.avro.io.BinaryEncoder) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 3 with BinaryEncoder

use of org.apache.avro.io.BinaryEncoder in project databus by linkedin.

the class GoldenGateEventProducer method addEventToBuffer.

/**
   *
   * @param dbUpdates  The dbUpdates present in the current transaction
   * @param ti The meta information about the transaction. (See TransactionInfo class for more details).
   * @throws DatabusException
   * @throws UnsupportedKeyException
   */
protected void addEventToBuffer(List<TransactionState.PerSourceTransactionalUpdate> dbUpdates, TransactionInfo ti) throws DatabusException, UnsupportedKeyException {
    if (dbUpdates.size() == 0)
        throw new DatabusException("Cannot handle empty dbUpdates");
    long scn = ti.getScn();
    long timestamp = ti.getTransactionTimeStampNs();
    EventSourceStatistics globalStats = getSource(GLOBAL_SOURCE_ID).getStatisticsBean();
    /**
     * We skip the start scn of the relay, we have already added a EOP for this SCN in the buffer.
     * Why is this not a problem ?
     * There are two cases:
     * 1. When we use the earliest/latest scn if there is no maxScn (We don't really have a start point). So it's really OK to miss the first event.
     * 2. If it's the maxSCN, then event was already seen by the relay.
     */
    if (scn == _startPrevScn.get()) {
        _log.info("Skipping this transaction, EOP already send for this event");
        return;
    }
    getEventBuffer().startEvents();
    int eventsInTransactionCount = 0;
    List<EventReaderSummary> summaries = new ArrayList<EventReaderSummary>();
    for (int i = 0; i < dbUpdates.size(); ++i) {
        GenericRecord record = null;
        TransactionState.PerSourceTransactionalUpdate perSourceUpdate = dbUpdates.get(i);
        short sourceId = (short) perSourceUpdate.getSourceId();
        // prepare stats collection per source
        EventSourceStatistics perSourceStats = getSource(sourceId).getStatisticsBean();
        Iterator<DbUpdateState.DBUpdateImage> dbUpdateIterator = perSourceUpdate.getDbUpdatesSet().iterator();
        int eventsInDbUpdate = 0;
        long dbUpdatesEventsSize = 0;
        long startDbUpdatesMs = System.currentTimeMillis();
        while (//TODO verify if there is any case where we need to rollback.
        dbUpdateIterator.hasNext()) {
            DbUpdateState.DBUpdateImage dbUpdate = dbUpdateIterator.next();
            //Construct the Databus Event key, determine the key type and construct the key
            Object keyObj = obtainKey(dbUpdate);
            DbusEventKey eventKey = new DbusEventKey(keyObj);
            //Get the logicalparition id
            PartitionFunction partitionFunction = _partitionFunctionHashMap.get((int) sourceId);
            short lPartitionId = partitionFunction.getPartition(eventKey);
            record = dbUpdate.getGenericRecord();
            //Write the event to the buffer
            if (record == null)
                throw new DatabusException("Cannot write event to buffer because record = " + record);
            if (record.getSchema() == null)
                throw new DatabusException("The record does not have a schema (null schema)");
            try {
                //Collect stats on number of dbUpdates for one source
                eventsInDbUpdate++;
                //Count of all the events in the current transaction
                eventsInTransactionCount++;
                // Serialize the row
                ByteArrayOutputStream bos = new ByteArrayOutputStream();
                Encoder encoder = new BinaryEncoder(bos);
                GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());
                writer.write(record, encoder);
                byte[] serializedValue = bos.toByteArray();
                //Get the md5 for the schema
                SchemaId schemaId = SchemaId.createWithMd5(dbUpdate.getSchema());
                //Determine the operation type and convert to dbus opcode
                DbusOpcode opCode;
                if (dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.INSERT || dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.UPDATE) {
                    opCode = DbusOpcode.UPSERT;
                    if (_log.isDebugEnabled())
                        _log.debug("The event with scn " + scn + " is INSERT/UPDATE");
                } else if (dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.DELETE) {
                    opCode = DbusOpcode.DELETE;
                    if (_log.isDebugEnabled())
                        _log.debug("The event with scn " + scn + " is DELETE");
                } else {
                    throw new DatabusException("Unknown opcode from dbUpdate for event with scn:" + scn);
                }
                //Construct the dbusEvent info
                DbusEventInfo dbusEventInfo = new DbusEventInfo(opCode, scn, (short) _pConfig.getId(), lPartitionId, timestamp, sourceId, schemaId.getByteArray(), serializedValue, false, false);
                dbusEventInfo.setReplicated(dbUpdate.isReplicated());
                perSourceStats.addEventCycle(1, ti.getTransactionTimeRead(), serializedValue.length, scn);
                globalStats.addEventCycle(1, ti.getTransactionTimeRead(), serializedValue.length, scn);
                long tsEnd = System.currentTimeMillis();
                perSourceStats.addTimeOfLastDBAccess(tsEnd);
                globalStats.addTimeOfLastDBAccess(tsEnd);
                //Append to the event buffer
                getEventBuffer().appendEvent(eventKey, dbusEventInfo, _statsCollector);
                _rc.incrementEventCount();
                dbUpdatesEventsSize += serializedValue.length;
            } catch (IOException io) {
                perSourceStats.addError();
                globalStats.addEmptyEventCycle();
                _log.error("Cannot create byte stream payload: " + dbUpdates.get(i).getSourceId());
            }
        }
        long endDbUpdatesMs = System.currentTimeMillis();
        long dbUpdatesElapsedTimeMs = endDbUpdatesMs - startDbUpdatesMs;
        // Log Event Summary at logical source level
        EventReaderSummary summary = new EventReaderSummary(sourceId, _monitoredSources.get(sourceId).getSourceName(), scn, eventsInDbUpdate, dbUpdatesEventsSize, -1L, /* Not supported */
        dbUpdatesElapsedTimeMs, timestamp, timestamp, -1L);
        if (_eventsLog.isInfoEnabled()) {
            _eventsLog.info(summary.toString());
        }
        summaries.add(summary);
        if (_log.isDebugEnabled())
            _log.debug("There are " + eventsInDbUpdate + " events seen in the current dbUpdate");
    }
    // Log Event Summary at Physical source level
    ReadEventCycleSummary summary = new ReadEventCycleSummary(_pConfig.getName(), summaries, scn, -1);
    if (_eventsLog.isInfoEnabled()) {
        _eventsLog.info(summary.toString());
    }
    _log.info("Writing " + eventsInTransactionCount + " events from transaction with scn: " + scn);
    if (scn <= 0)
        throw new DatabusException("Unable to write events to buffer because of negative/zero scn: " + scn);
    getEventBuffer().endEvents(scn, _statsCollector);
    _scn.set(scn);
    if (getMaxScnReaderWriter() != null) {
        try {
            getMaxScnReaderWriter().saveMaxScn(_scn.get());
        } catch (DatabusException e) {
            _log.error("Cannot save scn = " + _scn + " for physical source = " + getName(), e);
        }
    }
}
Also used : PartitionFunction(com.linkedin.databus2.producers.PartitionFunction) TransactionState(com.linkedin.databus2.ggParser.XmlStateMachine.TransactionState) ArrayList(java.util.ArrayList) EventSourceStatistics(com.linkedin.databus.monitoring.mbean.EventSourceStatistics) DbusEventInfo(com.linkedin.databus.core.DbusEventInfo) ReadEventCycleSummary(com.linkedin.databus2.producers.db.ReadEventCycleSummary) EventReaderSummary(com.linkedin.databus2.producers.db.EventReaderSummary) Encoder(org.apache.avro.io.Encoder) BinaryEncoder(org.apache.avro.io.BinaryEncoder) DbusOpcode(com.linkedin.databus.core.DbusOpcode) GenericRecord(org.apache.avro.generic.GenericRecord) DbUpdateState(com.linkedin.databus2.ggParser.XmlStateMachine.DbUpdateState) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) IOException(java.io.IOException) DatabusException(com.linkedin.databus2.core.DatabusException) BinaryEncoder(org.apache.avro.io.BinaryEncoder) SchemaId(com.linkedin.databus2.schemas.SchemaId) PerSourceTransactionalUpdate(com.linkedin.databus2.ggParser.XmlStateMachine.TransactionState.PerSourceTransactionalUpdate) DbusEventKey(com.linkedin.databus.core.DbusEventKey)

Example 4 with BinaryEncoder

use of org.apache.avro.io.BinaryEncoder in project pinot by linkedin.

the class BaseClusterIntegrationTest method pushAvroIntoKafka.

public static void pushAvroIntoKafka(List<File> avroFiles, String kafkaBroker, String kafkaTopic, final byte[] header) {
    Properties properties = new Properties();
    properties.put("metadata.broker.list", kafkaBroker);
    properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
    properties.put("request.required.acks", "1");
    ProducerConfig producerConfig = new ProducerConfig(properties);
    Producer<byte[], byte[]> producer = new Producer<byte[], byte[]>(producerConfig);
    for (File avroFile : avroFiles) {
        try {
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536);
            DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
            BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
            GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(reader.getSchema());
            int recordCount = 0;
            List<KeyedMessage<byte[], byte[]>> messagesToWrite = new ArrayList<KeyedMessage<byte[], byte[]>>(10000);
            int messagesInThisBatch = 0;
            for (GenericRecord genericRecord : reader) {
                outputStream.reset();
                if (header != null && 0 < header.length) {
                    outputStream.write(header);
                }
                datumWriter.write(genericRecord, binaryEncoder);
                binaryEncoder.flush();
                byte[] bytes = outputStream.toByteArray();
                KeyedMessage<byte[], byte[]> data = new KeyedMessage<byte[], byte[]>(kafkaTopic, Longs.toByteArray(System.currentTimeMillis()), bytes);
                if (BATCH_KAFKA_MESSAGES) {
                    messagesToWrite.add(data);
                    messagesInThisBatch++;
                    if (MAX_MESSAGES_PER_BATCH <= messagesInThisBatch) {
                        LOGGER.debug("Sending a batch of {} records to Kafka", messagesInThisBatch);
                        messagesInThisBatch = 0;
                        producer.send(messagesToWrite);
                        messagesToWrite.clear();
                    }
                } else {
                    producer.send(data);
                }
                recordCount += 1;
            }
            if (BATCH_KAFKA_MESSAGES) {
                LOGGER.info("Sending last match of {} records to Kafka", messagesToWrite.size());
                producer.send(messagesToWrite);
            }
            outputStream.close();
            reader.close();
            LOGGER.info("Finished writing " + recordCount + " records from " + avroFile.getName() + " into Kafka topic " + kafkaTopic + " from file " + avroFile.getName());
            int totalRecordCount = totalAvroRecordWrittenCount.addAndGet(recordCount);
            LOGGER.info("Total records written so far " + totalRecordCount);
        } catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
}
Also used : EncoderFactory(org.apache.avro.io.EncoderFactory) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Properties(java.util.Properties) JSONException(org.json.JSONException) ArchiveException(org.apache.commons.compress.archivers.ArchiveException) SQLException(java.sql.SQLException) IOException(java.io.IOException) Producer(kafka.javaapi.producer.Producer) BinaryEncoder(org.apache.avro.io.BinaryEncoder) ProducerConfig(kafka.producer.ProducerConfig) GenericRecord(org.apache.avro.generic.GenericRecord) KeyedMessage(kafka.producer.KeyedMessage) File(java.io.File)

Example 5 with BinaryEncoder

use of org.apache.avro.io.BinaryEncoder in project gora by apache.

the class HBaseByteInterface method toBytes.

/**
   * Serializes an object following the given schema.
   * Does not handle <code>array/map</code> if it is not inside a <code>record</code>
   * @param o Utf8|ByteBuffer|Integer|Long|Float|Double|Boolean|Enum|Persistent
   * @param schema The schema describing the object (or a compatible description)
   * @return array of bytes of the serialized object
   * @throws IOException
   */
@SuppressWarnings({ "rawtypes", "unchecked" })
public static byte[] toBytes(Object o, Schema schema) throws IOException {
    Type type = schema.getType();
    switch(type) {
        // TODO: maybe ((Utf8)o).getBytes(); ?
        case STRING:
            return Bytes.toBytes(((CharSequence) o).toString());
        case BYTES:
            return ((ByteBuffer) o).array();
        case INT:
            return Bytes.toBytes((Integer) o);
        case LONG:
            return Bytes.toBytes((Long) o);
        case FLOAT:
            return Bytes.toBytes((Float) o);
        case DOUBLE:
            return Bytes.toBytes((Double) o);
        case BOOLEAN:
            return (Boolean) o ? new byte[] { 1 } : new byte[] { 0 };
        case ENUM:
            return new byte[] { (byte) ((Enum<?>) o).ordinal() };
        case UNION:
        case RECORD:
            SpecificDatumWriter writer = writerMap.get(schema.getFullName());
            if (writer == null) {
                // ignore dirty bits
                writer = new SpecificDatumWriter(schema);
                writerMap.put(schema.getFullName(), writer);
            }
            BinaryEncoder encoderFromCache = encoders.get();
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            outputStream.set(bos);
            BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder(bos, null);
            if (encoderFromCache == null) {
                encoders.set(encoder);
            }
            //reset the buffers
            ByteArrayOutputStream os = outputStream.get();
            os.reset();
            writer.write(o, encoder);
            encoder.flush();
            return os.toByteArray();
        default:
            throw new RuntimeException("Unknown type: " + type);
    }
}
Also used : Type(org.apache.avro.Schema.Type) BinaryEncoder(org.apache.avro.io.BinaryEncoder) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteBuffer(java.nio.ByteBuffer) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter)

Aggregations

BinaryEncoder (org.apache.avro.io.BinaryEncoder)29 ByteArrayOutputStream (java.io.ByteArrayOutputStream)20 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)17 GenericRecord (org.apache.avro.generic.GenericRecord)17 IOException (java.io.IOException)14 Encoder (org.apache.avro.io.Encoder)10 SerializationException (voldemort.serialization.SerializationException)5 SpecificDatumWriter (org.apache.avro.specific.SpecificDatumWriter)4 ArrayList (java.util.ArrayList)3 DbusEventInfo (com.linkedin.databus.core.DbusEventInfo)2 SQLException (java.sql.SQLException)2 Properties (java.util.Properties)2 Producer (kafka.javaapi.producer.Producer)2 KeyedMessage (kafka.producer.KeyedMessage)2 ProducerConfig (kafka.producer.ProducerConfig)2 Schema (org.apache.avro.Schema)2 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)2 EncoderFactory (org.apache.avro.io.EncoderFactory)2 OutputBuffer (org.apache.cassandra.io.util.OutputBuffer)2 ArchiveException (org.apache.commons.compress.archivers.ArchiveException)2