use of org.apache.avro.io.BinaryEncoder in project storm by apache.
the class AbstractAvroSerializer method write.
@Override
public void write(Kryo kryo, Output output, GenericContainer record) {
String fingerPrint = this.getFingerprint(record.getSchema());
output.writeString(fingerPrint);
GenericDatumWriter<GenericContainer> writer = new GenericDatumWriter<>(record.getSchema());
BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder(output, null);
try {
writer.write(record, encoder);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.avro.io.BinaryEncoder in project databus by linkedin.
the class RelayEventGenerator method populateEvents.
int populateEvents(String source, short id, GenericRecord record, DbusEventKey key, byte[] schemaId, DbusEventsStatisticsCollector statsCollector, DbusEventBufferAppendable buffer) {
if (record != null && key != null) {
try {
// Serialize the row
ByteArrayOutputStream bos = new ByteArrayOutputStream();
Encoder encoder = new BinaryEncoder(bos);
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());
writer.write(record, encoder);
byte[] serializedValue = bos.toByteArray();
short pPartitionId = RngUtils.randomPositiveShort();
short lPartitionId = RngUtils.randomPositiveShort();
long timeStamp = System.currentTimeMillis() * 1000000;
buffer.appendEvent(key, pPartitionId, lPartitionId, timeStamp, id, schemaId, serializedValue, false, statsCollector);
return 1;
} catch (IOException io) {
LOG.error("Cannot create byte stream payload: " + source);
}
}
return 0;
}
use of org.apache.avro.io.BinaryEncoder in project databus by linkedin.
the class GoldenGateEventProducer method addEventToBuffer.
/**
*
* @param dbUpdates The dbUpdates present in the current transaction
* @param ti The meta information about the transaction. (See TransactionInfo class for more details).
* @throws DatabusException
* @throws UnsupportedKeyException
*/
protected void addEventToBuffer(List<TransactionState.PerSourceTransactionalUpdate> dbUpdates, TransactionInfo ti) throws DatabusException, UnsupportedKeyException {
if (dbUpdates.size() == 0)
throw new DatabusException("Cannot handle empty dbUpdates");
long scn = ti.getScn();
long timestamp = ti.getTransactionTimeStampNs();
EventSourceStatistics globalStats = getSource(GLOBAL_SOURCE_ID).getStatisticsBean();
/**
* We skip the start scn of the relay, we have already added a EOP for this SCN in the buffer.
* Why is this not a problem ?
* There are two cases:
* 1. When we use the earliest/latest scn if there is no maxScn (We don't really have a start point). So it's really OK to miss the first event.
* 2. If it's the maxSCN, then event was already seen by the relay.
*/
if (scn == _startPrevScn.get()) {
_log.info("Skipping this transaction, EOP already send for this event");
return;
}
getEventBuffer().startEvents();
int eventsInTransactionCount = 0;
List<EventReaderSummary> summaries = new ArrayList<EventReaderSummary>();
for (int i = 0; i < dbUpdates.size(); ++i) {
GenericRecord record = null;
TransactionState.PerSourceTransactionalUpdate perSourceUpdate = dbUpdates.get(i);
short sourceId = (short) perSourceUpdate.getSourceId();
// prepare stats collection per source
EventSourceStatistics perSourceStats = getSource(sourceId).getStatisticsBean();
Iterator<DbUpdateState.DBUpdateImage> dbUpdateIterator = perSourceUpdate.getDbUpdatesSet().iterator();
int eventsInDbUpdate = 0;
long dbUpdatesEventsSize = 0;
long startDbUpdatesMs = System.currentTimeMillis();
while (//TODO verify if there is any case where we need to rollback.
dbUpdateIterator.hasNext()) {
DbUpdateState.DBUpdateImage dbUpdate = dbUpdateIterator.next();
//Construct the Databus Event key, determine the key type and construct the key
Object keyObj = obtainKey(dbUpdate);
DbusEventKey eventKey = new DbusEventKey(keyObj);
//Get the logicalparition id
PartitionFunction partitionFunction = _partitionFunctionHashMap.get((int) sourceId);
short lPartitionId = partitionFunction.getPartition(eventKey);
record = dbUpdate.getGenericRecord();
//Write the event to the buffer
if (record == null)
throw new DatabusException("Cannot write event to buffer because record = " + record);
if (record.getSchema() == null)
throw new DatabusException("The record does not have a schema (null schema)");
try {
//Collect stats on number of dbUpdates for one source
eventsInDbUpdate++;
//Count of all the events in the current transaction
eventsInTransactionCount++;
// Serialize the row
ByteArrayOutputStream bos = new ByteArrayOutputStream();
Encoder encoder = new BinaryEncoder(bos);
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());
writer.write(record, encoder);
byte[] serializedValue = bos.toByteArray();
//Get the md5 for the schema
SchemaId schemaId = SchemaId.createWithMd5(dbUpdate.getSchema());
//Determine the operation type and convert to dbus opcode
DbusOpcode opCode;
if (dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.INSERT || dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.UPDATE) {
opCode = DbusOpcode.UPSERT;
if (_log.isDebugEnabled())
_log.debug("The event with scn " + scn + " is INSERT/UPDATE");
} else if (dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.DELETE) {
opCode = DbusOpcode.DELETE;
if (_log.isDebugEnabled())
_log.debug("The event with scn " + scn + " is DELETE");
} else {
throw new DatabusException("Unknown opcode from dbUpdate for event with scn:" + scn);
}
//Construct the dbusEvent info
DbusEventInfo dbusEventInfo = new DbusEventInfo(opCode, scn, (short) _pConfig.getId(), lPartitionId, timestamp, sourceId, schemaId.getByteArray(), serializedValue, false, false);
dbusEventInfo.setReplicated(dbUpdate.isReplicated());
perSourceStats.addEventCycle(1, ti.getTransactionTimeRead(), serializedValue.length, scn);
globalStats.addEventCycle(1, ti.getTransactionTimeRead(), serializedValue.length, scn);
long tsEnd = System.currentTimeMillis();
perSourceStats.addTimeOfLastDBAccess(tsEnd);
globalStats.addTimeOfLastDBAccess(tsEnd);
//Append to the event buffer
getEventBuffer().appendEvent(eventKey, dbusEventInfo, _statsCollector);
_rc.incrementEventCount();
dbUpdatesEventsSize += serializedValue.length;
} catch (IOException io) {
perSourceStats.addError();
globalStats.addEmptyEventCycle();
_log.error("Cannot create byte stream payload: " + dbUpdates.get(i).getSourceId());
}
}
long endDbUpdatesMs = System.currentTimeMillis();
long dbUpdatesElapsedTimeMs = endDbUpdatesMs - startDbUpdatesMs;
// Log Event Summary at logical source level
EventReaderSummary summary = new EventReaderSummary(sourceId, _monitoredSources.get(sourceId).getSourceName(), scn, eventsInDbUpdate, dbUpdatesEventsSize, -1L, /* Not supported */
dbUpdatesElapsedTimeMs, timestamp, timestamp, -1L);
if (_eventsLog.isInfoEnabled()) {
_eventsLog.info(summary.toString());
}
summaries.add(summary);
if (_log.isDebugEnabled())
_log.debug("There are " + eventsInDbUpdate + " events seen in the current dbUpdate");
}
// Log Event Summary at Physical source level
ReadEventCycleSummary summary = new ReadEventCycleSummary(_pConfig.getName(), summaries, scn, -1);
if (_eventsLog.isInfoEnabled()) {
_eventsLog.info(summary.toString());
}
_log.info("Writing " + eventsInTransactionCount + " events from transaction with scn: " + scn);
if (scn <= 0)
throw new DatabusException("Unable to write events to buffer because of negative/zero scn: " + scn);
getEventBuffer().endEvents(scn, _statsCollector);
_scn.set(scn);
if (getMaxScnReaderWriter() != null) {
try {
getMaxScnReaderWriter().saveMaxScn(_scn.get());
} catch (DatabusException e) {
_log.error("Cannot save scn = " + _scn + " for physical source = " + getName(), e);
}
}
}
use of org.apache.avro.io.BinaryEncoder in project pinot by linkedin.
the class BaseClusterIntegrationTest method pushAvroIntoKafka.
public static void pushAvroIntoKafka(List<File> avroFiles, String kafkaBroker, String kafkaTopic, final byte[] header) {
Properties properties = new Properties();
properties.put("metadata.broker.list", kafkaBroker);
properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
properties.put("request.required.acks", "1");
ProducerConfig producerConfig = new ProducerConfig(properties);
Producer<byte[], byte[]> producer = new Producer<byte[], byte[]>(producerConfig);
for (File avroFile : avroFiles) {
try {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536);
DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(reader.getSchema());
int recordCount = 0;
List<KeyedMessage<byte[], byte[]>> messagesToWrite = new ArrayList<KeyedMessage<byte[], byte[]>>(10000);
int messagesInThisBatch = 0;
for (GenericRecord genericRecord : reader) {
outputStream.reset();
if (header != null && 0 < header.length) {
outputStream.write(header);
}
datumWriter.write(genericRecord, binaryEncoder);
binaryEncoder.flush();
byte[] bytes = outputStream.toByteArray();
KeyedMessage<byte[], byte[]> data = new KeyedMessage<byte[], byte[]>(kafkaTopic, Longs.toByteArray(System.currentTimeMillis()), bytes);
if (BATCH_KAFKA_MESSAGES) {
messagesToWrite.add(data);
messagesInThisBatch++;
if (MAX_MESSAGES_PER_BATCH <= messagesInThisBatch) {
LOGGER.debug("Sending a batch of {} records to Kafka", messagesInThisBatch);
messagesInThisBatch = 0;
producer.send(messagesToWrite);
messagesToWrite.clear();
}
} else {
producer.send(data);
}
recordCount += 1;
}
if (BATCH_KAFKA_MESSAGES) {
LOGGER.info("Sending last match of {} records to Kafka", messagesToWrite.size());
producer.send(messagesToWrite);
}
outputStream.close();
reader.close();
LOGGER.info("Finished writing " + recordCount + " records from " + avroFile.getName() + " into Kafka topic " + kafkaTopic + " from file " + avroFile.getName());
int totalRecordCount = totalAvroRecordWrittenCount.addAndGet(recordCount);
LOGGER.info("Total records written so far " + totalRecordCount);
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
}
use of org.apache.avro.io.BinaryEncoder in project gora by apache.
the class HBaseByteInterface method toBytes.
/**
* Serializes an object following the given schema.
* Does not handle <code>array/map</code> if it is not inside a <code>record</code>
* @param o Utf8|ByteBuffer|Integer|Long|Float|Double|Boolean|Enum|Persistent
* @param schema The schema describing the object (or a compatible description)
* @return array of bytes of the serialized object
* @throws IOException
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
public static byte[] toBytes(Object o, Schema schema) throws IOException {
Type type = schema.getType();
switch(type) {
// TODO: maybe ((Utf8)o).getBytes(); ?
case STRING:
return Bytes.toBytes(((CharSequence) o).toString());
case BYTES:
return ((ByteBuffer) o).array();
case INT:
return Bytes.toBytes((Integer) o);
case LONG:
return Bytes.toBytes((Long) o);
case FLOAT:
return Bytes.toBytes((Float) o);
case DOUBLE:
return Bytes.toBytes((Double) o);
case BOOLEAN:
return (Boolean) o ? new byte[] { 1 } : new byte[] { 0 };
case ENUM:
return new byte[] { (byte) ((Enum<?>) o).ordinal() };
case UNION:
case RECORD:
SpecificDatumWriter writer = writerMap.get(schema.getFullName());
if (writer == null) {
// ignore dirty bits
writer = new SpecificDatumWriter(schema);
writerMap.put(schema.getFullName(), writer);
}
BinaryEncoder encoderFromCache = encoders.get();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
outputStream.set(bos);
BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder(bos, null);
if (encoderFromCache == null) {
encoders.set(encoder);
}
//reset the buffers
ByteArrayOutputStream os = outputStream.get();
os.reset();
writer.write(o, encoder);
encoder.flush();
return os.toByteArray();
default:
throw new RuntimeException("Unknown type: " + type);
}
}
Aggregations