Search in sources :

Example 1 with EventReaderSummary

use of com.linkedin.databus2.producers.db.EventReaderSummary in project databus by linkedin.

the class GoldenGateEventProducer method addEventToBuffer.

/**
   *
   * @param dbUpdates  The dbUpdates present in the current transaction
   * @param ti The meta information about the transaction. (See TransactionInfo class for more details).
   * @throws DatabusException
   * @throws UnsupportedKeyException
   */
protected void addEventToBuffer(List<TransactionState.PerSourceTransactionalUpdate> dbUpdates, TransactionInfo ti) throws DatabusException, UnsupportedKeyException {
    if (dbUpdates.size() == 0)
        throw new DatabusException("Cannot handle empty dbUpdates");
    long scn = ti.getScn();
    long timestamp = ti.getTransactionTimeStampNs();
    EventSourceStatistics globalStats = getSource(GLOBAL_SOURCE_ID).getStatisticsBean();
    /**
     * We skip the start scn of the relay, we have already added a EOP for this SCN in the buffer.
     * Why is this not a problem ?
     * There are two cases:
     * 1. When we use the earliest/latest scn if there is no maxScn (We don't really have a start point). So it's really OK to miss the first event.
     * 2. If it's the maxSCN, then event was already seen by the relay.
     */
    if (scn == _startPrevScn.get()) {
        _log.info("Skipping this transaction, EOP already send for this event");
        return;
    }
    getEventBuffer().startEvents();
    int eventsInTransactionCount = 0;
    List<EventReaderSummary> summaries = new ArrayList<EventReaderSummary>();
    for (int i = 0; i < dbUpdates.size(); ++i) {
        GenericRecord record = null;
        TransactionState.PerSourceTransactionalUpdate perSourceUpdate = dbUpdates.get(i);
        short sourceId = (short) perSourceUpdate.getSourceId();
        // prepare stats collection per source
        EventSourceStatistics perSourceStats = getSource(sourceId).getStatisticsBean();
        Iterator<DbUpdateState.DBUpdateImage> dbUpdateIterator = perSourceUpdate.getDbUpdatesSet().iterator();
        int eventsInDbUpdate = 0;
        long dbUpdatesEventsSize = 0;
        long startDbUpdatesMs = System.currentTimeMillis();
        while (//TODO verify if there is any case where we need to rollback.
        dbUpdateIterator.hasNext()) {
            DbUpdateState.DBUpdateImage dbUpdate = dbUpdateIterator.next();
            //Construct the Databus Event key, determine the key type and construct the key
            Object keyObj = obtainKey(dbUpdate);
            DbusEventKey eventKey = new DbusEventKey(keyObj);
            //Get the logicalparition id
            PartitionFunction partitionFunction = _partitionFunctionHashMap.get((int) sourceId);
            short lPartitionId = partitionFunction.getPartition(eventKey);
            record = dbUpdate.getGenericRecord();
            //Write the event to the buffer
            if (record == null)
                throw new DatabusException("Cannot write event to buffer because record = " + record);
            if (record.getSchema() == null)
                throw new DatabusException("The record does not have a schema (null schema)");
            try {
                //Collect stats on number of dbUpdates for one source
                eventsInDbUpdate++;
                //Count of all the events in the current transaction
                eventsInTransactionCount++;
                // Serialize the row
                ByteArrayOutputStream bos = new ByteArrayOutputStream();
                Encoder encoder = new BinaryEncoder(bos);
                GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());
                writer.write(record, encoder);
                byte[] serializedValue = bos.toByteArray();
                //Get the md5 for the schema
                SchemaId schemaId = SchemaId.createWithMd5(dbUpdate.getSchema());
                //Determine the operation type and convert to dbus opcode
                DbusOpcode opCode;
                if (dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.INSERT || dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.UPDATE) {
                    opCode = DbusOpcode.UPSERT;
                    if (_log.isDebugEnabled())
                        _log.debug("The event with scn " + scn + " is INSERT/UPDATE");
                } else if (dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.DELETE) {
                    opCode = DbusOpcode.DELETE;
                    if (_log.isDebugEnabled())
                        _log.debug("The event with scn " + scn + " is DELETE");
                } else {
                    throw new DatabusException("Unknown opcode from dbUpdate for event with scn:" + scn);
                }
                //Construct the dbusEvent info
                DbusEventInfo dbusEventInfo = new DbusEventInfo(opCode, scn, (short) _pConfig.getId(), lPartitionId, timestamp, sourceId, schemaId.getByteArray(), serializedValue, false, false);
                dbusEventInfo.setReplicated(dbUpdate.isReplicated());
                perSourceStats.addEventCycle(1, ti.getTransactionTimeRead(), serializedValue.length, scn);
                globalStats.addEventCycle(1, ti.getTransactionTimeRead(), serializedValue.length, scn);
                long tsEnd = System.currentTimeMillis();
                perSourceStats.addTimeOfLastDBAccess(tsEnd);
                globalStats.addTimeOfLastDBAccess(tsEnd);
                //Append to the event buffer
                getEventBuffer().appendEvent(eventKey, dbusEventInfo, _statsCollector);
                _rc.incrementEventCount();
                dbUpdatesEventsSize += serializedValue.length;
            } catch (IOException io) {
                perSourceStats.addError();
                globalStats.addEmptyEventCycle();
                _log.error("Cannot create byte stream payload: " + dbUpdates.get(i).getSourceId());
            }
        }
        long endDbUpdatesMs = System.currentTimeMillis();
        long dbUpdatesElapsedTimeMs = endDbUpdatesMs - startDbUpdatesMs;
        // Log Event Summary at logical source level
        EventReaderSummary summary = new EventReaderSummary(sourceId, _monitoredSources.get(sourceId).getSourceName(), scn, eventsInDbUpdate, dbUpdatesEventsSize, -1L, /* Not supported */
        dbUpdatesElapsedTimeMs, timestamp, timestamp, -1L);
        if (_eventsLog.isInfoEnabled()) {
            _eventsLog.info(summary.toString());
        }
        summaries.add(summary);
        if (_log.isDebugEnabled())
            _log.debug("There are " + eventsInDbUpdate + " events seen in the current dbUpdate");
    }
    // Log Event Summary at Physical source level
    ReadEventCycleSummary summary = new ReadEventCycleSummary(_pConfig.getName(), summaries, scn, -1);
    if (_eventsLog.isInfoEnabled()) {
        _eventsLog.info(summary.toString());
    }
    _log.info("Writing " + eventsInTransactionCount + " events from transaction with scn: " + scn);
    if (scn <= 0)
        throw new DatabusException("Unable to write events to buffer because of negative/zero scn: " + scn);
    getEventBuffer().endEvents(scn, _statsCollector);
    _scn.set(scn);
    if (getMaxScnReaderWriter() != null) {
        try {
            getMaxScnReaderWriter().saveMaxScn(_scn.get());
        } catch (DatabusException e) {
            _log.error("Cannot save scn = " + _scn + " for physical source = " + getName(), e);
        }
    }
}
Also used : PartitionFunction(com.linkedin.databus2.producers.PartitionFunction) TransactionState(com.linkedin.databus2.ggParser.XmlStateMachine.TransactionState) ArrayList(java.util.ArrayList) EventSourceStatistics(com.linkedin.databus.monitoring.mbean.EventSourceStatistics) DbusEventInfo(com.linkedin.databus.core.DbusEventInfo) ReadEventCycleSummary(com.linkedin.databus2.producers.db.ReadEventCycleSummary) EventReaderSummary(com.linkedin.databus2.producers.db.EventReaderSummary) Encoder(org.apache.avro.io.Encoder) BinaryEncoder(org.apache.avro.io.BinaryEncoder) DbusOpcode(com.linkedin.databus.core.DbusOpcode) GenericRecord(org.apache.avro.generic.GenericRecord) DbUpdateState(com.linkedin.databus2.ggParser.XmlStateMachine.DbUpdateState) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) IOException(java.io.IOException) DatabusException(com.linkedin.databus2.core.DatabusException) BinaryEncoder(org.apache.avro.io.BinaryEncoder) SchemaId(com.linkedin.databus2.schemas.SchemaId) PerSourceTransactionalUpdate(com.linkedin.databus2.ggParser.XmlStateMachine.TransactionState.PerSourceTransactionalUpdate) DbusEventKey(com.linkedin.databus.core.DbusEventKey)

Example 2 with EventReaderSummary

use of com.linkedin.databus2.producers.db.EventReaderSummary in project databus by linkedin.

the class RelayStatsAdapter method getReadEventSummary.

/**
		 
		 * @return eventsummary at this instant of time; readTime will be computed as the diff between cur and last ; 
		 */
protected ReadEventCycleSummary getReadEventSummary() {
    List<EventReaderSummary> sourceSummaries = new ArrayList<EventReaderSummary>();
    for (Integer srcId : _stats.getSources()) {
        sourceSummaries.add(getSummary(srcId));
    }
    ReadEventCycleSummary summary = new ReadEventCycleSummary(_name, sourceSummaries, _stats.getTotalStats().getMaxScn(), System.currentTimeMillis());
    return summary;
}
Also used : ReadEventCycleSummary(com.linkedin.databus2.producers.db.ReadEventCycleSummary) EventReaderSummary(com.linkedin.databus2.producers.db.EventReaderSummary) ArrayList(java.util.ArrayList)

Example 3 with EventReaderSummary

use of com.linkedin.databus2.producers.db.EventReaderSummary in project databus by linkedin.

the class RelayStatsAdapter method getDiff.

/**
		 * produce diff of two summaries; src1 -src2
		 */
protected ReadEventCycleSummary getDiff(ReadEventCycleSummary src1, ReadEventCycleSummary src2) {
    List<EventReaderSummary> src1Summaries = src1.getSourceSummaries();
    List<EventReaderSummary> src2Summaries = src2.getSourceSummaries();
    if (src1Summaries.size() == src2Summaries.size()) {
        List<EventReaderSummary> sourceSummaries = new ArrayList<EventReaderSummary>();
        for (int i = 0; i < src1Summaries.size(); ++i) {
            sourceSummaries.add(getDiff(src1Summaries.get(i), src2Summaries.get(i)));
        }
        ReadEventCycleSummary r1 = new ReadEventCycleSummary(src1.getEventSourceName(), sourceSummaries, src1.getEndOfWindowScn(), src1.getReadMillis() - src2.getReadMillis());
        return r1;
    }
    return null;
}
Also used : ReadEventCycleSummary(com.linkedin.databus2.producers.db.ReadEventCycleSummary) EventReaderSummary(com.linkedin.databus2.producers.db.EventReaderSummary) ArrayList(java.util.ArrayList)

Example 4 with EventReaderSummary

use of com.linkedin.databus2.producers.db.EventReaderSummary in project databus by linkedin.

the class BootstrapAvroFileEventReader method readEventsFromHadoopFiles.

private EventReaderSummary readEventsFromHadoopFiles(OracleTriggerMonitoredSourceInfo sourceInfo, File avroSeedDir, Long windowSCN) {
    DataFileReader<GenericRecord> reader = null;
    File[] files = avroSeedDir.listFiles();
    List<File> fileList = Arrays.asList(files);
    Collections.sort(fileList);
    long numRead = 0;
    long prevNumRead = 0;
    long numBytes = 0;
    long timestamp = System.currentTimeMillis();
    long timeStart = timestamp;
    long lastTime = timestamp;
    long commitInterval = _config.getCommitInterval();
    long totLatency = 0;
    GenericRecord record = null;
    RateMonitor seedingRate = new RateMonitor("Seeding Rate");
    seedingRate.start();
    seedingRate.suspend();
    long startRowId = _lastRows.get(sourceInfo.getEventView());
    LOG.info("Last Known Row Id is :" + startRowId);
    boolean resumeSeedingRate = true;
    for (File avroSeedFile : files) {
        if (!avroSeedFile.isFile())
            continue;
        LOG.info("Seeding from File : " + avroSeedFile);
        try {
            reader = new DataFileReader<GenericRecord>(avroSeedFile, new GenericDatumReader<GenericRecord>());
        } catch (IOException e) {
            LOG.fatal("Failed to bootstrap from file " + avroSeedFile.getAbsolutePath(), e);
            throw new RuntimeException("Failed to bootstrap from file " + avroSeedFile.getAbsolutePath(), e);
        }
        try {
            boolean committed = false;
            for (GenericRecord hdfsRecord : reader) {
                record = hdfsRecord;
                committed = false;
                numRead++;
                if (numRead < startRowId)
                    continue;
                if (resumeSeedingRate) {
                    seedingRate.resume();
                    resumeSeedingRate = false;
                }
                seedingRate.tick();
                //LOG.info("Read record :" + record);	    			
                long start = System.nanoTime();
                long eventSize = sourceInfo.getFactory().createAndAppendEvent(windowSCN, timestamp, hdfsRecord, _bootstrapEventBuffer, false, null);
                numBytes += eventSize;
                long latency = System.nanoTime() - start;
                totLatency += latency;
                if (numRead % commitInterval == 0) {
                    _bootstrapEventBuffer.endEvents(numRead, timestamp, null);
                    _bootstrapEventBuffer.startEvents();
                    long procTime = totLatency / 1000000000;
                    long currTime = System.currentTimeMillis();
                    long diff = (currTime - lastTime) / 1000;
                    long timeSinceStart = (currTime - timeStart) / 1000;
                    LOG.info("Processed " + commitInterval + " rows in " + diff + " seconds, Avro Processing Time (seconds) so far :" + (procTime) + ",Seconds elapsed since start :" + (timeSinceStart) + ",Overall Row Rate:" + seedingRate.getRate() + ", NumRows Fetched so far:" + numRead + ". TotalEventSize :" + numBytes);
                    lastTime = currTime;
                    seedingRate.resume();
                    committed = true;
                }
            }
            if (!committed) {
                _bootstrapEventBuffer.endEvents(numRead, timestamp, null);
                _bootstrapEventBuffer.startEvents();
                long procTime = totLatency / 1000000000;
                long currTime = System.currentTimeMillis();
                long diff = (currTime - lastTime) / 1000;
                long timeSinceStart = (currTime - timeStart) / 1000;
                LOG.info("Completed Seeding from : " + avroSeedFile + ", Processed " + commitInterval + " rows in " + diff + " seconds, Avro Processing Time (seconds) so far :" + (procTime) + ",Seconds elapsed since start :" + (timeSinceStart) + ",Overall Row Rate:" + seedingRate.getRate() + ", NumRows Fetched so far:" + numRead + ". TotalEventSize :" + numBytes);
                lastTime = currTime;
                seedingRate.resume();
            }
        } catch (Exception e) {
            LOG.fatal("NumRead :" + numRead + ", Got Exception while processing generic record :" + record, e);
            throw new RuntimeException(e);
        }
        LOG.info("Processed " + (numRead - prevNumRead) + " rows of Source: " + sourceInfo.getSourceName() + " from file " + avroSeedFile);
        prevNumRead = numRead;
    }
    long timeEnd = System.currentTimeMillis();
    long elapsedMin = (timeEnd - timeStart) / (MILLISEC_TO_MIN);
    LOG.info("Processed " + numRead + " rows of Source: " + sourceInfo.getSourceName() + " in " + elapsedMin + " minutes");
    return new EventReaderSummary(sourceInfo.getSourceId(), sourceInfo.getSourceName(), -1, (int) numRead, numBytes, (timeEnd - timeStart), (timeEnd - timeStart) / numRead, 0, 0, 0);
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) IOException(java.io.IOException) RateMonitor(com.linkedin.databus.core.util.RateMonitor) EventCreationException(com.linkedin.databus2.producers.EventCreationException) DatabusException(com.linkedin.databus2.core.DatabusException) InvalidConfigException(com.linkedin.databus.core.util.InvalidConfigException) IOException(java.io.IOException) UnsupportedKeyException(com.linkedin.databus.core.UnsupportedKeyException) EventReaderSummary(com.linkedin.databus2.producers.db.EventReaderSummary) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 5 with EventReaderSummary

use of com.linkedin.databus2.producers.db.EventReaderSummary in project databus by linkedin.

the class BootstrapAvroFileEventReader method readEventsFromAllSources.

@Override
public ReadEventCycleSummary readEventsFromAllSources(long sinceSCN) throws DatabusException, EventCreationException, UnsupportedKeyException {
    List<EventReaderSummary> summaries = new ArrayList<EventReaderSummary>();
    boolean error = false;
    long startTS = System.currentTimeMillis();
    long endScn = -1;
    long minScn = Long.MAX_VALUE;
    try {
        for (OracleTriggerMonitoredSourceInfo sourceInfo : _sources) {
            endScn = _config.getSeedWindowSCNMap().get(sourceInfo.getEventView());
            minScn = Math.min(endScn, minScn);
            LOG.info("Bootstrapping " + sourceInfo.getEventView());
            _bootstrapEventBuffer.start(endScn);
            String dir = _config.getAvroSeedInputDirMap().get(sourceInfo.getEventView());
            File d = new File(dir);
            EventReaderSummary summary = readEventsFromHadoopFiles(sourceInfo, d, endScn);
            // Script assumes seeding is done for one schema at a time
            _bootstrapEventBuffer.endEvents(BootstrapEventBuffer.END_OF_SOURCE, endScn, null);
            summaries.add(summary);
        }
    } catch (Exception ex) {
        error = true;
        throw new DatabusException(ex);
    } finally {
        // Notify writer that I am done
        if (error) {
            _bootstrapEventBuffer.endEvents(BootstrapEventBuffer.ERROR_CODE, endScn, null);
            LOG.error("Seeder stopping unexpectedly !!");
        } else {
            _bootstrapEventBuffer.endEvents(BootstrapEventBuffer.END_OF_FILE, endScn, null);
            LOG.info("Completed Seeding !!");
        }
    }
    LOG.info("Start SCN :" + minScn);
    long endTS = System.currentTimeMillis();
    ReadEventCycleSummary cycleSummary = new ReadEventCycleSummary("seeder", summaries, minScn, (endTS - startTS));
    return cycleSummary;
}
Also used : ReadEventCycleSummary(com.linkedin.databus2.producers.db.ReadEventCycleSummary) EventReaderSummary(com.linkedin.databus2.producers.db.EventReaderSummary) DatabusException(com.linkedin.databus2.core.DatabusException) ArrayList(java.util.ArrayList) File(java.io.File) EventCreationException(com.linkedin.databus2.producers.EventCreationException) DatabusException(com.linkedin.databus2.core.DatabusException) InvalidConfigException(com.linkedin.databus.core.util.InvalidConfigException) IOException(java.io.IOException) UnsupportedKeyException(com.linkedin.databus.core.UnsupportedKeyException) OracleTriggerMonitoredSourceInfo(com.linkedin.databus2.producers.db.OracleTriggerMonitoredSourceInfo)

Aggregations

EventReaderSummary (com.linkedin.databus2.producers.db.EventReaderSummary)7 DatabusException (com.linkedin.databus2.core.DatabusException)6 ArrayList (java.util.ArrayList)6 UnsupportedKeyException (com.linkedin.databus.core.UnsupportedKeyException)5 EventCreationException (com.linkedin.databus2.producers.EventCreationException)5 ReadEventCycleSummary (com.linkedin.databus2.producers.db.ReadEventCycleSummary)5 IOException (java.io.IOException)5 InvalidConfigException (com.linkedin.databus.core.util.InvalidConfigException)4 File (java.io.File)3 SQLException (java.sql.SQLException)3 RateMonitor (com.linkedin.databus.core.util.RateMonitor)2 OracleTriggerMonitoredSourceInfo (com.linkedin.databus2.producers.db.OracleTriggerMonitoredSourceInfo)2 Connection (java.sql.Connection)2 GenericRecord (org.apache.avro.generic.GenericRecord)2 DbusEventInfo (com.linkedin.databus.core.DbusEventInfo)1 DbusEventKey (com.linkedin.databus.core.DbusEventKey)1 KeyType (com.linkedin.databus.core.DbusEventKey.KeyType)1 DbusOpcode (com.linkedin.databus.core.DbusOpcode)1 EventSourceStatistics (com.linkedin.databus.monitoring.mbean.EventSourceStatistics)1 DbUpdateState (com.linkedin.databus2.ggParser.XmlStateMachine.DbUpdateState)1