use of com.linkedin.databus2.producers.db.EventReaderSummary in project databus by linkedin.
the class GoldenGateEventProducer method addEventToBuffer.
/**
*
* @param dbUpdates The dbUpdates present in the current transaction
* @param ti The meta information about the transaction. (See TransactionInfo class for more details).
* @throws DatabusException
* @throws UnsupportedKeyException
*/
protected void addEventToBuffer(List<TransactionState.PerSourceTransactionalUpdate> dbUpdates, TransactionInfo ti) throws DatabusException, UnsupportedKeyException {
if (dbUpdates.size() == 0)
throw new DatabusException("Cannot handle empty dbUpdates");
long scn = ti.getScn();
long timestamp = ti.getTransactionTimeStampNs();
EventSourceStatistics globalStats = getSource(GLOBAL_SOURCE_ID).getStatisticsBean();
/**
* We skip the start scn of the relay, we have already added a EOP for this SCN in the buffer.
* Why is this not a problem ?
* There are two cases:
* 1. When we use the earliest/latest scn if there is no maxScn (We don't really have a start point). So it's really OK to miss the first event.
* 2. If it's the maxSCN, then event was already seen by the relay.
*/
if (scn == _startPrevScn.get()) {
_log.info("Skipping this transaction, EOP already send for this event");
return;
}
getEventBuffer().startEvents();
int eventsInTransactionCount = 0;
List<EventReaderSummary> summaries = new ArrayList<EventReaderSummary>();
for (int i = 0; i < dbUpdates.size(); ++i) {
GenericRecord record = null;
TransactionState.PerSourceTransactionalUpdate perSourceUpdate = dbUpdates.get(i);
short sourceId = (short) perSourceUpdate.getSourceId();
// prepare stats collection per source
EventSourceStatistics perSourceStats = getSource(sourceId).getStatisticsBean();
Iterator<DbUpdateState.DBUpdateImage> dbUpdateIterator = perSourceUpdate.getDbUpdatesSet().iterator();
int eventsInDbUpdate = 0;
long dbUpdatesEventsSize = 0;
long startDbUpdatesMs = System.currentTimeMillis();
while (//TODO verify if there is any case where we need to rollback.
dbUpdateIterator.hasNext()) {
DbUpdateState.DBUpdateImage dbUpdate = dbUpdateIterator.next();
//Construct the Databus Event key, determine the key type and construct the key
Object keyObj = obtainKey(dbUpdate);
DbusEventKey eventKey = new DbusEventKey(keyObj);
//Get the logicalparition id
PartitionFunction partitionFunction = _partitionFunctionHashMap.get((int) sourceId);
short lPartitionId = partitionFunction.getPartition(eventKey);
record = dbUpdate.getGenericRecord();
//Write the event to the buffer
if (record == null)
throw new DatabusException("Cannot write event to buffer because record = " + record);
if (record.getSchema() == null)
throw new DatabusException("The record does not have a schema (null schema)");
try {
//Collect stats on number of dbUpdates for one source
eventsInDbUpdate++;
//Count of all the events in the current transaction
eventsInTransactionCount++;
// Serialize the row
ByteArrayOutputStream bos = new ByteArrayOutputStream();
Encoder encoder = new BinaryEncoder(bos);
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());
writer.write(record, encoder);
byte[] serializedValue = bos.toByteArray();
//Get the md5 for the schema
SchemaId schemaId = SchemaId.createWithMd5(dbUpdate.getSchema());
//Determine the operation type and convert to dbus opcode
DbusOpcode opCode;
if (dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.INSERT || dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.UPDATE) {
opCode = DbusOpcode.UPSERT;
if (_log.isDebugEnabled())
_log.debug("The event with scn " + scn + " is INSERT/UPDATE");
} else if (dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.DELETE) {
opCode = DbusOpcode.DELETE;
if (_log.isDebugEnabled())
_log.debug("The event with scn " + scn + " is DELETE");
} else {
throw new DatabusException("Unknown opcode from dbUpdate for event with scn:" + scn);
}
//Construct the dbusEvent info
DbusEventInfo dbusEventInfo = new DbusEventInfo(opCode, scn, (short) _pConfig.getId(), lPartitionId, timestamp, sourceId, schemaId.getByteArray(), serializedValue, false, false);
dbusEventInfo.setReplicated(dbUpdate.isReplicated());
perSourceStats.addEventCycle(1, ti.getTransactionTimeRead(), serializedValue.length, scn);
globalStats.addEventCycle(1, ti.getTransactionTimeRead(), serializedValue.length, scn);
long tsEnd = System.currentTimeMillis();
perSourceStats.addTimeOfLastDBAccess(tsEnd);
globalStats.addTimeOfLastDBAccess(tsEnd);
//Append to the event buffer
getEventBuffer().appendEvent(eventKey, dbusEventInfo, _statsCollector);
_rc.incrementEventCount();
dbUpdatesEventsSize += serializedValue.length;
} catch (IOException io) {
perSourceStats.addError();
globalStats.addEmptyEventCycle();
_log.error("Cannot create byte stream payload: " + dbUpdates.get(i).getSourceId());
}
}
long endDbUpdatesMs = System.currentTimeMillis();
long dbUpdatesElapsedTimeMs = endDbUpdatesMs - startDbUpdatesMs;
// Log Event Summary at logical source level
EventReaderSummary summary = new EventReaderSummary(sourceId, _monitoredSources.get(sourceId).getSourceName(), scn, eventsInDbUpdate, dbUpdatesEventsSize, -1L, /* Not supported */
dbUpdatesElapsedTimeMs, timestamp, timestamp, -1L);
if (_eventsLog.isInfoEnabled()) {
_eventsLog.info(summary.toString());
}
summaries.add(summary);
if (_log.isDebugEnabled())
_log.debug("There are " + eventsInDbUpdate + " events seen in the current dbUpdate");
}
// Log Event Summary at Physical source level
ReadEventCycleSummary summary = new ReadEventCycleSummary(_pConfig.getName(), summaries, scn, -1);
if (_eventsLog.isInfoEnabled()) {
_eventsLog.info(summary.toString());
}
_log.info("Writing " + eventsInTransactionCount + " events from transaction with scn: " + scn);
if (scn <= 0)
throw new DatabusException("Unable to write events to buffer because of negative/zero scn: " + scn);
getEventBuffer().endEvents(scn, _statsCollector);
_scn.set(scn);
if (getMaxScnReaderWriter() != null) {
try {
getMaxScnReaderWriter().saveMaxScn(_scn.get());
} catch (DatabusException e) {
_log.error("Cannot save scn = " + _scn + " for physical source = " + getName(), e);
}
}
}
use of com.linkedin.databus2.producers.db.EventReaderSummary in project databus by linkedin.
the class RelayStatsAdapter method getReadEventSummary.
/**
* @return eventsummary at this instant of time; readTime will be computed as the diff between cur and last ;
*/
protected ReadEventCycleSummary getReadEventSummary() {
List<EventReaderSummary> sourceSummaries = new ArrayList<EventReaderSummary>();
for (Integer srcId : _stats.getSources()) {
sourceSummaries.add(getSummary(srcId));
}
ReadEventCycleSummary summary = new ReadEventCycleSummary(_name, sourceSummaries, _stats.getTotalStats().getMaxScn(), System.currentTimeMillis());
return summary;
}
use of com.linkedin.databus2.producers.db.EventReaderSummary in project databus by linkedin.
the class RelayStatsAdapter method getDiff.
/**
* produce diff of two summaries; src1 -src2
*/
protected ReadEventCycleSummary getDiff(ReadEventCycleSummary src1, ReadEventCycleSummary src2) {
List<EventReaderSummary> src1Summaries = src1.getSourceSummaries();
List<EventReaderSummary> src2Summaries = src2.getSourceSummaries();
if (src1Summaries.size() == src2Summaries.size()) {
List<EventReaderSummary> sourceSummaries = new ArrayList<EventReaderSummary>();
for (int i = 0; i < src1Summaries.size(); ++i) {
sourceSummaries.add(getDiff(src1Summaries.get(i), src2Summaries.get(i)));
}
ReadEventCycleSummary r1 = new ReadEventCycleSummary(src1.getEventSourceName(), sourceSummaries, src1.getEndOfWindowScn(), src1.getReadMillis() - src2.getReadMillis());
return r1;
}
return null;
}
use of com.linkedin.databus2.producers.db.EventReaderSummary in project databus by linkedin.
the class BootstrapAvroFileEventReader method readEventsFromHadoopFiles.
private EventReaderSummary readEventsFromHadoopFiles(OracleTriggerMonitoredSourceInfo sourceInfo, File avroSeedDir, Long windowSCN) {
DataFileReader<GenericRecord> reader = null;
File[] files = avroSeedDir.listFiles();
List<File> fileList = Arrays.asList(files);
Collections.sort(fileList);
long numRead = 0;
long prevNumRead = 0;
long numBytes = 0;
long timestamp = System.currentTimeMillis();
long timeStart = timestamp;
long lastTime = timestamp;
long commitInterval = _config.getCommitInterval();
long totLatency = 0;
GenericRecord record = null;
RateMonitor seedingRate = new RateMonitor("Seeding Rate");
seedingRate.start();
seedingRate.suspend();
long startRowId = _lastRows.get(sourceInfo.getEventView());
LOG.info("Last Known Row Id is :" + startRowId);
boolean resumeSeedingRate = true;
for (File avroSeedFile : files) {
if (!avroSeedFile.isFile())
continue;
LOG.info("Seeding from File : " + avroSeedFile);
try {
reader = new DataFileReader<GenericRecord>(avroSeedFile, new GenericDatumReader<GenericRecord>());
} catch (IOException e) {
LOG.fatal("Failed to bootstrap from file " + avroSeedFile.getAbsolutePath(), e);
throw new RuntimeException("Failed to bootstrap from file " + avroSeedFile.getAbsolutePath(), e);
}
try {
boolean committed = false;
for (GenericRecord hdfsRecord : reader) {
record = hdfsRecord;
committed = false;
numRead++;
if (numRead < startRowId)
continue;
if (resumeSeedingRate) {
seedingRate.resume();
resumeSeedingRate = false;
}
seedingRate.tick();
//LOG.info("Read record :" + record);
long start = System.nanoTime();
long eventSize = sourceInfo.getFactory().createAndAppendEvent(windowSCN, timestamp, hdfsRecord, _bootstrapEventBuffer, false, null);
numBytes += eventSize;
long latency = System.nanoTime() - start;
totLatency += latency;
if (numRead % commitInterval == 0) {
_bootstrapEventBuffer.endEvents(numRead, timestamp, null);
_bootstrapEventBuffer.startEvents();
long procTime = totLatency / 1000000000;
long currTime = System.currentTimeMillis();
long diff = (currTime - lastTime) / 1000;
long timeSinceStart = (currTime - timeStart) / 1000;
LOG.info("Processed " + commitInterval + " rows in " + diff + " seconds, Avro Processing Time (seconds) so far :" + (procTime) + ",Seconds elapsed since start :" + (timeSinceStart) + ",Overall Row Rate:" + seedingRate.getRate() + ", NumRows Fetched so far:" + numRead + ". TotalEventSize :" + numBytes);
lastTime = currTime;
seedingRate.resume();
committed = true;
}
}
if (!committed) {
_bootstrapEventBuffer.endEvents(numRead, timestamp, null);
_bootstrapEventBuffer.startEvents();
long procTime = totLatency / 1000000000;
long currTime = System.currentTimeMillis();
long diff = (currTime - lastTime) / 1000;
long timeSinceStart = (currTime - timeStart) / 1000;
LOG.info("Completed Seeding from : " + avroSeedFile + ", Processed " + commitInterval + " rows in " + diff + " seconds, Avro Processing Time (seconds) so far :" + (procTime) + ",Seconds elapsed since start :" + (timeSinceStart) + ",Overall Row Rate:" + seedingRate.getRate() + ", NumRows Fetched so far:" + numRead + ". TotalEventSize :" + numBytes);
lastTime = currTime;
seedingRate.resume();
}
} catch (Exception e) {
LOG.fatal("NumRead :" + numRead + ", Got Exception while processing generic record :" + record, e);
throw new RuntimeException(e);
}
LOG.info("Processed " + (numRead - prevNumRead) + " rows of Source: " + sourceInfo.getSourceName() + " from file " + avroSeedFile);
prevNumRead = numRead;
}
long timeEnd = System.currentTimeMillis();
long elapsedMin = (timeEnd - timeStart) / (MILLISEC_TO_MIN);
LOG.info("Processed " + numRead + " rows of Source: " + sourceInfo.getSourceName() + " in " + elapsedMin + " minutes");
return new EventReaderSummary(sourceInfo.getSourceId(), sourceInfo.getSourceName(), -1, (int) numRead, numBytes, (timeEnd - timeStart), (timeEnd - timeStart) / numRead, 0, 0, 0);
}
use of com.linkedin.databus2.producers.db.EventReaderSummary in project databus by linkedin.
the class BootstrapAvroFileEventReader method readEventsFromAllSources.
@Override
public ReadEventCycleSummary readEventsFromAllSources(long sinceSCN) throws DatabusException, EventCreationException, UnsupportedKeyException {
List<EventReaderSummary> summaries = new ArrayList<EventReaderSummary>();
boolean error = false;
long startTS = System.currentTimeMillis();
long endScn = -1;
long minScn = Long.MAX_VALUE;
try {
for (OracleTriggerMonitoredSourceInfo sourceInfo : _sources) {
endScn = _config.getSeedWindowSCNMap().get(sourceInfo.getEventView());
minScn = Math.min(endScn, minScn);
LOG.info("Bootstrapping " + sourceInfo.getEventView());
_bootstrapEventBuffer.start(endScn);
String dir = _config.getAvroSeedInputDirMap().get(sourceInfo.getEventView());
File d = new File(dir);
EventReaderSummary summary = readEventsFromHadoopFiles(sourceInfo, d, endScn);
// Script assumes seeding is done for one schema at a time
_bootstrapEventBuffer.endEvents(BootstrapEventBuffer.END_OF_SOURCE, endScn, null);
summaries.add(summary);
}
} catch (Exception ex) {
error = true;
throw new DatabusException(ex);
} finally {
// Notify writer that I am done
if (error) {
_bootstrapEventBuffer.endEvents(BootstrapEventBuffer.ERROR_CODE, endScn, null);
LOG.error("Seeder stopping unexpectedly !!");
} else {
_bootstrapEventBuffer.endEvents(BootstrapEventBuffer.END_OF_FILE, endScn, null);
LOG.info("Completed Seeding !!");
}
}
LOG.info("Start SCN :" + minScn);
long endTS = System.currentTimeMillis();
ReadEventCycleSummary cycleSummary = new ReadEventCycleSummary("seeder", summaries, minScn, (endTS - startTS));
return cycleSummary;
}
Aggregations