Search in sources :

Example 51 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project databus by linkedin.

the class RelayEventGenerator method populateEvents.

int populateEvents(String source, short id, GenericRecord record, DbusEventKey key, byte[] schemaId, DbusEventsStatisticsCollector statsCollector, DbusEventBufferAppendable buffer) {
    if (record != null && key != null) {
        try {
            // Serialize the row
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            Encoder encoder = new BinaryEncoder(bos);
            GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());
            writer.write(record, encoder);
            byte[] serializedValue = bos.toByteArray();
            short pPartitionId = RngUtils.randomPositiveShort();
            short lPartitionId = RngUtils.randomPositiveShort();
            long timeStamp = System.currentTimeMillis() * 1000000;
            buffer.appendEvent(key, pPartitionId, lPartitionId, timeStamp, id, schemaId, serializedValue, false, statsCollector);
            return 1;
        } catch (IOException io) {
            LOG.error("Cannot create byte stream payload: " + source);
        }
    }
    return 0;
}
Also used : BinaryEncoder(org.apache.avro.io.BinaryEncoder) Encoder(org.apache.avro.io.Encoder) BinaryEncoder(org.apache.avro.io.BinaryEncoder) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 52 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project databus by linkedin.

the class GoldenGateEventProducer method addEventToBuffer.

/**
   *
   * @param dbUpdates  The dbUpdates present in the current transaction
   * @param ti The meta information about the transaction. (See TransactionInfo class for more details).
   * @throws DatabusException
   * @throws UnsupportedKeyException
   */
protected void addEventToBuffer(List<TransactionState.PerSourceTransactionalUpdate> dbUpdates, TransactionInfo ti) throws DatabusException, UnsupportedKeyException {
    if (dbUpdates.size() == 0)
        throw new DatabusException("Cannot handle empty dbUpdates");
    long scn = ti.getScn();
    long timestamp = ti.getTransactionTimeStampNs();
    EventSourceStatistics globalStats = getSource(GLOBAL_SOURCE_ID).getStatisticsBean();
    /**
     * We skip the start scn of the relay, we have already added a EOP for this SCN in the buffer.
     * Why is this not a problem ?
     * There are two cases:
     * 1. When we use the earliest/latest scn if there is no maxScn (We don't really have a start point). So it's really OK to miss the first event.
     * 2. If it's the maxSCN, then event was already seen by the relay.
     */
    if (scn == _startPrevScn.get()) {
        _log.info("Skipping this transaction, EOP already send for this event");
        return;
    }
    getEventBuffer().startEvents();
    int eventsInTransactionCount = 0;
    List<EventReaderSummary> summaries = new ArrayList<EventReaderSummary>();
    for (int i = 0; i < dbUpdates.size(); ++i) {
        GenericRecord record = null;
        TransactionState.PerSourceTransactionalUpdate perSourceUpdate = dbUpdates.get(i);
        short sourceId = (short) perSourceUpdate.getSourceId();
        // prepare stats collection per source
        EventSourceStatistics perSourceStats = getSource(sourceId).getStatisticsBean();
        Iterator<DbUpdateState.DBUpdateImage> dbUpdateIterator = perSourceUpdate.getDbUpdatesSet().iterator();
        int eventsInDbUpdate = 0;
        long dbUpdatesEventsSize = 0;
        long startDbUpdatesMs = System.currentTimeMillis();
        while (//TODO verify if there is any case where we need to rollback.
        dbUpdateIterator.hasNext()) {
            DbUpdateState.DBUpdateImage dbUpdate = dbUpdateIterator.next();
            //Construct the Databus Event key, determine the key type and construct the key
            Object keyObj = obtainKey(dbUpdate);
            DbusEventKey eventKey = new DbusEventKey(keyObj);
            //Get the logicalparition id
            PartitionFunction partitionFunction = _partitionFunctionHashMap.get((int) sourceId);
            short lPartitionId = partitionFunction.getPartition(eventKey);
            record = dbUpdate.getGenericRecord();
            //Write the event to the buffer
            if (record == null)
                throw new DatabusException("Cannot write event to buffer because record = " + record);
            if (record.getSchema() == null)
                throw new DatabusException("The record does not have a schema (null schema)");
            try {
                //Collect stats on number of dbUpdates for one source
                eventsInDbUpdate++;
                //Count of all the events in the current transaction
                eventsInTransactionCount++;
                // Serialize the row
                ByteArrayOutputStream bos = new ByteArrayOutputStream();
                Encoder encoder = new BinaryEncoder(bos);
                GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());
                writer.write(record, encoder);
                byte[] serializedValue = bos.toByteArray();
                //Get the md5 for the schema
                SchemaId schemaId = SchemaId.createWithMd5(dbUpdate.getSchema());
                //Determine the operation type and convert to dbus opcode
                DbusOpcode opCode;
                if (dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.INSERT || dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.UPDATE) {
                    opCode = DbusOpcode.UPSERT;
                    if (_log.isDebugEnabled())
                        _log.debug("The event with scn " + scn + " is INSERT/UPDATE");
                } else if (dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.DELETE) {
                    opCode = DbusOpcode.DELETE;
                    if (_log.isDebugEnabled())
                        _log.debug("The event with scn " + scn + " is DELETE");
                } else {
                    throw new DatabusException("Unknown opcode from dbUpdate for event with scn:" + scn);
                }
                //Construct the dbusEvent info
                DbusEventInfo dbusEventInfo = new DbusEventInfo(opCode, scn, (short) _pConfig.getId(), lPartitionId, timestamp, sourceId, schemaId.getByteArray(), serializedValue, false, false);
                dbusEventInfo.setReplicated(dbUpdate.isReplicated());
                perSourceStats.addEventCycle(1, ti.getTransactionTimeRead(), serializedValue.length, scn);
                globalStats.addEventCycle(1, ti.getTransactionTimeRead(), serializedValue.length, scn);
                long tsEnd = System.currentTimeMillis();
                perSourceStats.addTimeOfLastDBAccess(tsEnd);
                globalStats.addTimeOfLastDBAccess(tsEnd);
                //Append to the event buffer
                getEventBuffer().appendEvent(eventKey, dbusEventInfo, _statsCollector);
                _rc.incrementEventCount();
                dbUpdatesEventsSize += serializedValue.length;
            } catch (IOException io) {
                perSourceStats.addError();
                globalStats.addEmptyEventCycle();
                _log.error("Cannot create byte stream payload: " + dbUpdates.get(i).getSourceId());
            }
        }
        long endDbUpdatesMs = System.currentTimeMillis();
        long dbUpdatesElapsedTimeMs = endDbUpdatesMs - startDbUpdatesMs;
        // Log Event Summary at logical source level
        EventReaderSummary summary = new EventReaderSummary(sourceId, _monitoredSources.get(sourceId).getSourceName(), scn, eventsInDbUpdate, dbUpdatesEventsSize, -1L, /* Not supported */
        dbUpdatesElapsedTimeMs, timestamp, timestamp, -1L);
        if (_eventsLog.isInfoEnabled()) {
            _eventsLog.info(summary.toString());
        }
        summaries.add(summary);
        if (_log.isDebugEnabled())
            _log.debug("There are " + eventsInDbUpdate + " events seen in the current dbUpdate");
    }
    // Log Event Summary at Physical source level
    ReadEventCycleSummary summary = new ReadEventCycleSummary(_pConfig.getName(), summaries, scn, -1);
    if (_eventsLog.isInfoEnabled()) {
        _eventsLog.info(summary.toString());
    }
    _log.info("Writing " + eventsInTransactionCount + " events from transaction with scn: " + scn);
    if (scn <= 0)
        throw new DatabusException("Unable to write events to buffer because of negative/zero scn: " + scn);
    getEventBuffer().endEvents(scn, _statsCollector);
    _scn.set(scn);
    if (getMaxScnReaderWriter() != null) {
        try {
            getMaxScnReaderWriter().saveMaxScn(_scn.get());
        } catch (DatabusException e) {
            _log.error("Cannot save scn = " + _scn + " for physical source = " + getName(), e);
        }
    }
}
Also used : PartitionFunction(com.linkedin.databus2.producers.PartitionFunction) TransactionState(com.linkedin.databus2.ggParser.XmlStateMachine.TransactionState) ArrayList(java.util.ArrayList) EventSourceStatistics(com.linkedin.databus.monitoring.mbean.EventSourceStatistics) DbusEventInfo(com.linkedin.databus.core.DbusEventInfo) ReadEventCycleSummary(com.linkedin.databus2.producers.db.ReadEventCycleSummary) EventReaderSummary(com.linkedin.databus2.producers.db.EventReaderSummary) Encoder(org.apache.avro.io.Encoder) BinaryEncoder(org.apache.avro.io.BinaryEncoder) DbusOpcode(com.linkedin.databus.core.DbusOpcode) GenericRecord(org.apache.avro.generic.GenericRecord) DbUpdateState(com.linkedin.databus2.ggParser.XmlStateMachine.DbUpdateState) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) IOException(java.io.IOException) DatabusException(com.linkedin.databus2.core.DatabusException) BinaryEncoder(org.apache.avro.io.BinaryEncoder) SchemaId(com.linkedin.databus2.schemas.SchemaId) PerSourceTransactionalUpdate(com.linkedin.databus2.ggParser.XmlStateMachine.TransactionState.PerSourceTransactionalUpdate) DbusEventKey(com.linkedin.databus.core.DbusEventKey)

Example 53 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project databus by linkedin.

the class OracleAvroGenericEventFactory method addOracleRecordToParent.

/**
   * Copies the value of a simple-type event field from DB field value to an Avro record
   *
   * @param  parentRecord           the parent Avro record to which to add the generated child
   * @param  fieldName              the name of the Avro field
   * @param  fieldSchema            the schema of the Avro field (must be a record)
   * @param  dbFieldValue           the DB field value from the result set
   * @throws EventCreationException if conversion from the STRUCT type to the Avro type failed
   */
private void addOracleRecordToParent(GenericRecord parentRecord, String fieldName, Schema fieldSchema, Struct dbFieldValue) throws EventCreationException {
    GenericRecord fieldRecord = new GenericData.Record(fieldSchema);
    putOracleRecord(fieldRecord, fieldSchema, dbFieldValue);
    parentRecord.put(fieldName, fieldRecord);
}
Also used : GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 54 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project databus by linkedin.

the class OracleAvroGenericEventFactory method putArray.

private void putArray(GenericRecord record, String arrayFieldName, Schema schema, Array array) throws EventCreationException {
    // Make sure this is an array type
    if (schema.getType() != Type.ARRAY) {
        throw new EventCreationException("Not an array type. " + schema.getName());
    }
    Schema elementSchema = schema.getElementType();
    GenericArray<GenericRecord> avroArray = new GenericData.Array<GenericRecord>(0, schema);
    try {
        ResultSet arrayResultSet = array.getResultSet();
        try {
            while (arrayResultSet.next()) {
                // Create the avro record and add it to the array
                GenericRecord elemRecord = new GenericData.Record(elementSchema);
                avroArray.add(elemRecord);
                // Get the underlying structure from the database. Oracle returns the structure in the
                // second column of the array's ResultSet
                Struct struct = (Struct) arrayResultSet.getObject(2);
                putOracleRecord(elemRecord, elementSchema, struct);
            }
        } finally {
            arrayResultSet.close();
        }
    } catch (SQLException e) {
        throw new EventCreationException("putArray error: " + e.getMessage(), e);
    }
    record.put(arrayFieldName, avroArray);
}
Also used : GenericArray(org.apache.avro.generic.GenericArray) Array(java.sql.Array) SQLException(java.sql.SQLException) EventCreationException(com.linkedin.databus2.producers.EventCreationException) Schema(org.apache.avro.Schema) ResultSet(java.sql.ResultSet) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Struct(java.sql.Struct)

Example 55 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project databus by linkedin.

the class OracleAvroGenericEventFactory method buildGenericRecord.

/**
   * Build a GenericRecord from the contents of the current ResultSet row.
   * @param rs
   * @return
   * @throws SQLException
   */
protected GenericRecord buildGenericRecord(ResultSet rs) throws SQLException, EventCreationException {
    boolean traceEnabled = _log.isTraceEnabled();
    if (traceEnabled) {
        _log.trace("--- New Record ---");
    }
    // Initialize a new GenericData.Record from the event schema
    GenericRecord record = new GenericData.Record(_eventSchema);
    // Iterate over the array of fields defined in the Avro schema
    List<Field> fields = _eventSchema.getFields();
    for (Field field : fields) {
        // Get the Avro field type information
        String schemaFieldName = field.name();
        // This is just field.schema() if field is not a union; but if it IS one,
        // this is the schema of the first non-null type within the union:
        Schema fieldSchema = SchemaHelper.unwindUnionSchema(field);
        Type avroFieldType = fieldSchema.getType();
        if (avroFieldType == Type.ARRAY) {
            // Process as an array.  Note that we're encoding to Avro's internal representation rather
            // than to Avro binary format, which is what allows us to directly encode one of the union's
            // inner types (here as well as in put()) instead of wrapping the inner type in a union.
            // (Avro's binary encoding for unions includes an additional long index value before the
            // encoding of the selected inner type.)
            putArray(record, schemaFieldName, fieldSchema, getJdbcArray(rs, fieldSchema));
        } else {
            String databaseFieldName = SchemaHelper.getMetaField(field, "dbFieldName");
            try {
                Object databaseFieldValue = rs.getObject(databaseFieldName);
                put(record, field, databaseFieldValue);
            } catch (SQLException ex) {
                _log.error("Failed to read column (" + databaseFieldName + ") for source (" + _sourceId + ")");
                throw ex;
            }
        }
    }
    // Return the Avro record.
    return record;
}
Also used : Field(org.apache.avro.Schema.Field) Type(org.apache.avro.Schema.Type) SourceType(com.linkedin.databus2.relay.config.ReplicationBitSetterStaticConfig.SourceType) SQLException(java.sql.SQLException) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord)

Aggregations

GenericRecord (org.apache.avro.generic.GenericRecord)262 Schema (org.apache.avro.Schema)101 Test (org.junit.Test)80 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)46 File (java.io.File)35 IOException (java.io.IOException)34 GenericData (org.apache.avro.generic.GenericData)30 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)30 ArrayList (java.util.ArrayList)29 ByteArrayOutputStream (java.io.ByteArrayOutputStream)27 DataFileWriter (org.apache.avro.file.DataFileWriter)20 HashMap (java.util.HashMap)19 ByteBuffer (java.nio.ByteBuffer)18 BinaryEncoder (org.apache.avro.io.BinaryEncoder)17 Field (org.apache.avro.Schema.Field)14 DataFileStream (org.apache.avro.file.DataFileStream)14 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)14 Utf8 (org.apache.avro.util.Utf8)14 Encoder (org.apache.avro.io.Encoder)12 DatasetRepository (com.cloudera.cdk.data.DatasetRepository)11