use of com.linkedin.databus2.producers.db.EventReaderSummary in project databus by linkedin.
the class BootstrapSrcDBEventReader method readEventsFromAllSources.
@Override
public ReadEventCycleSummary readEventsFromAllSources(long sinceSCN) throws DatabusException, EventCreationException, UnsupportedKeyException {
List<EventReaderSummary> summaries = new ArrayList<EventReaderSummary>();
long maxScn = EventReaderSummary.NO_EVENTS_SCN;
long endScn = maxScn;
boolean error = false;
long startTS = System.currentTimeMillis();
try {
_rate.start();
_rate.suspend();
Connection conn = null;
try {
conn = _dataSource.getConnection();
LOG.info("Oracle JDBC Version :" + conn.getMetaData().getDriverVersion());
} finally {
DBHelper.close(conn);
}
if (!_sources.isEmpty()) {
// Script assumes seeding is done for one schema at a time
// just use one source to get the schema name for sy$txlog
maxScn = getMaxScn(_sources.get(0));
}
for (OracleTriggerMonitoredSourceInfo sourceInfo : _sources) {
LOG.info("Bootstrapping " + sourceInfo.getEventView());
_bootstrapSeedWriter.start(maxScn);
EventReaderSummary summary = readEventsForSource(sourceInfo, maxScn);
// Script assumes seeding is done for one schema at a time
// just use one source to get the schema name for sy$txlog
endScn = getMaxScn(_sources.get(0));
_bootstrapSeedWriter.endEvents(BootstrapEventBuffer.END_OF_SOURCE, endScn, null);
summaries.add(summary);
}
} catch (Exception ex) {
error = true;
throw new DatabusException(ex);
} finally {
// Notify writer that I am done
if (error) {
_bootstrapSeedWriter.endEvents(BootstrapEventBuffer.ERROR_CODE, endScn, null);
LOG.error("Seeder stopping unexpectedly !!");
} else {
_bootstrapSeedWriter.endEvents(BootstrapEventBuffer.END_OF_FILE, endScn, null);
LOG.info("Completed Seeding !!");
}
LOG.info("Start SCN :" + maxScn);
LOG.info("End SCN :" + endScn);
}
long endTS = System.currentTimeMillis();
ReadEventCycleSummary cycleSummary = new ReadEventCycleSummary("seeder", summaries, maxScn, (endTS - startTS));
return cycleSummary;
}
use of com.linkedin.databus2.producers.db.EventReaderSummary in project databus by linkedin.
the class BootstrapSrcDBEventReader method readEventsForSource.
private EventReaderSummary readEventsForSource(OracleTriggerMonitoredSourceInfo sourceInfo, long maxScn) throws DatabusException, EventCreationException, UnsupportedKeyException, SQLException, IOException {
int retryMax = _numRetries;
int numRetry = 0;
Connection conn = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
KeyType keyType = _pKeyTypeMap.get(sourceInfo.getEventView());
String keyName = _pKeyNameMap.get(sourceInfo.getEventView());
String sql = _eventQueryMap.get(sourceInfo.getEventView());
String endSrcKey = _endSrcKeyMap.get(sourceInfo.getEventView());
if (sql == null) {
sql = generateEventQuery2(sourceInfo, keyName, keyType, getPKIndex(sourceInfo), getQueryHint(sourceInfo));
}
LOG.info("Chunked Query for Source (" + sourceInfo + ") is :" + sql);
LOG.info("EndSrcKey for source (" + sourceInfo + ") is :" + endSrcKey);
PrimaryKeyTxn endKeyTxn = null;
if ((null != endSrcKey) && (!endSrcKey.trim().isEmpty())) {
if (KeyType.LONG == keyType)
endKeyTxn = new PrimaryKeyTxn(new Long(endSrcKey));
else
endKeyTxn = new PrimaryKeyTxn(endSrcKey);
}
long timestamp = System.currentTimeMillis();
int numRowsFetched = 0;
long totalEventSize = 0;
long timeStart = System.currentTimeMillis();
long checkpointInterval = _commitInterval;
boolean done = false;
long lastTime = timeStart;
long numRows = 0;
PrimaryKeyTxn pKey = null;
String minKeySQL = generateMinKeyQuery(sourceInfo, keyName);
String srcName = sourceInfo.getEventView();
LOG.info("Bootstrapping for Source :" + srcName);
String lastKey = _lastKeys.get(sourceInfo.getEventView());
File f = _keyTxnFilesMap.get(srcName);
FileWriter oStream = new FileWriter(f, f.exists());
BufferedWriter keyTxnWriter = new BufferedWriter(oStream, _keyTxnBufferSizeMap.get(srcName));
_bootstrapSeedWriter.startEvents();
RateMonitor seedingRate = new RateMonitor("Seeding Rate");
RateMonitor queryRate = new RateMonitor("Query Rate");
seedingRate.start();
seedingRate.suspend();
queryRate.start();
queryRate.suspend();
boolean isException = false;
long totProcessTime = 0;
try {
conn = _dataSource.getConnection();
pstmt = conn.prepareStatement(sql);
if (_enableNumRowsQuery)
numRows = getNumRows(conn, getTableName(sourceInfo));
else
numRows = -1;
long currRowId = _lastRows.get(sourceInfo.getEventView());
/**
* First Key to be seeded will be decided in the following order:
* 1. Use bootstrap_seeder_state's last srcKey as the key for the first chunk.
* 2. If (1) is empty, use passed-in begin srcKey.
* 3. If (2) is also empty, use Oracle's minKey as the first Chunk Key.
*/
if (null == lastKey) {
lastKey = _beginSrcKeyMap.get(sourceInfo.getEventView());
LOG.info("No last Src Key available in bootstrap_seeder_state for source (" + sourceInfo + ". Trying beginSrc Key from config :" + lastKey);
}
if ((null == lastKey) || (lastKey.trim().isEmpty())) {
if (KeyType.LONG == keyType)
pKey = new PrimaryKeyTxn(executeAndGetLong(minKeySQL));
else
pKey = new PrimaryKeyTxn(executeAndGetString(minKeySQL));
} else {
if (KeyType.LONG == keyType)
pKey = new PrimaryKeyTxn(Long.parseLong(lastKey));
else
pKey = new PrimaryKeyTxn(lastKey);
}
PrimaryKeyTxn lastRoundKeyTxn = new PrimaryKeyTxn(pKey);
PrimaryKeyTxn lastKeyTxn = new PrimaryKeyTxn(pKey);
long numUniqueKeysThisRound = 0;
boolean first = true;
_rate.resume();
while (!done) {
LOG.info("MinKey being used for this round:" + pKey);
numUniqueKeysThisRound = 0;
try {
lastRoundKeyTxn.copyFrom(pKey);
if (KeyType.LONG == keyType) {
pstmt.setLong(1, pKey.getKey());
} else {
String key = pKey.getKeyStr();
pstmt.setString(1, key);
}
pstmt.setLong(2, _numRowsPerQuery);
pstmt.setFetchSize(_numRowsPrefetch);
if (_oraclePreparedStatementClass.isInstance(pstmt)) {
try {
_setLobPrefetchSizeMethod.invoke(pstmt, _LOBPrefetchSize);
} catch (Exception e) {
throw new EventCreationException("Unable to set Lob Prefetch size" + e.getMessage());
}
}
LOG.info("Executing Oracle Query :" + sql + ". Key: " + pKey + ",NumRows: " + _numRowsPerQuery);
queryRate.resume();
rs = pstmt.executeQuery();
queryRate.suspend();
LOG.info("Total Query Latency :" + queryRate.getDuration() / 1000000000L);
long totLatency = 0;
long txnId = 0;
int numRowsThisRound = 0;
seedingRate.resume();
while (rs.next()) {
_rate.tick();
seedingRate.tick();
currRowId++;
txnId = rs.getLong(2);
if (KeyType.LONG == keyType) {
pKey.setKeyTxn(rs.getLong(1), txnId);
} else {
String key = rs.getString(1);
pKey.setKeyStrTxn(key, txnId);
}
//Write TXN to file
pKey.writeTo(keyTxnWriter);
//LOG.info("TXNId is :" + txnId + ",RowId is :" + currRowId);
long start = System.nanoTime();
long eventSize = sourceInfo.getFactory().createAndAppendEvent(maxScn, timestamp, rs, _bootstrapSeedWriter, false, null);
long latency = System.nanoTime() - start;
totLatency += latency;
totalEventSize += eventSize;
totProcessTime += (totLatency / 1000 * 1000);
numRowsFetched++;
numRowsThisRound++;
if (lastKeyTxn.compareKey(pKey) != 0) {
numUniqueKeysThisRound++;
lastKeyTxn.copyFrom(pKey);
}
if (numRowsFetched % checkpointInterval == 0) {
// Commit this batch and reinit
_bootstrapSeedWriter.endEvents(currRowId, timestamp, null);
keyTxnWriter.flush();
_bootstrapSeedWriter.startEvents();
long procTime = totLatency / 1000000000;
long currTime = System.currentTimeMillis();
long diff = (currTime - lastTime) / 1000;
long timeSinceStart = (currTime - timeStart) / 1000;
double currRate = _rate.getRate();
currRate = (currRate <= 0) ? 1 : currRate;
if (_enableNumRowsQuery) {
double remTime = (numRows - currRowId) / (currRate);
LOG.info("Processed " + checkpointInterval + " rows in " + diff + " seconds, Processing Time (seconds) so far :" + (procTime) + ",Seconds elapsed since start :" + (timeSinceStart) + ",Approx Seconds remaining :" + remTime + ",Overall Row Rate:" + _rate.getRate() + "(" + seedingRate.getRate() + ")" + ",NumRows Fetched so far:" + numRowsFetched + ". TotalEventSize :" + totalEventSize);
} else {
LOG.info("Processed " + checkpointInterval + " rows in " + diff + " seconds, Processing Time (seconds) so far :" + (procTime) + ",Seconds elapsed since start :" + (timeSinceStart) + ",Overall Row Rate:" + _rate.getRate() + "(" + seedingRate.getRate() + ")" + ",NumRows Fetched so far:" + numRowsFetched + ". TotalEventSize :" + totalEventSize);
}
lastTime = currTime;
}
if ((null != endKeyTxn) && (endKeyTxn.compareKey(lastKeyTxn) < 0)) {
LOG.info("Seeding to be stopped for current source as it has completed seeding upto endSrckey :" + endKeyTxn + ", Current SrcKey :" + lastKeyTxn);
break;
}
}
seedingRate.suspend();
if ((numRowsThisRound <= 1) || ((numRowsThisRound < _numRowsPerQuery) && (numUniqueKeysThisRound <= 1))) {
LOG.info("Seeding Done for source :" + sourceInfo.getEventView() + ", numRowsThisRound :" + numRowsThisRound + ", _numRowsPerQuery :" + _numRowsPerQuery + ", numUniqueKeys :" + numUniqueKeysThisRound);
done = true;
} else if ((numRowsThisRound == _numRowsPerQuery) && (numUniqueKeysThisRound <= 1)) {
String msg = "Seeding stuck at infinte loop for source : " + sourceInfo.getEventView() + ", numRowsThisRound :" + numRowsThisRound + ", _numRowsPerQuery :" + _numRowsPerQuery + ", numUniqueKeys :" + numUniqueKeysThisRound + ", lastChunkKey :" + lastRoundKeyTxn;
LOG.error(msg);
throw new DatabusException(msg);
} else if (null != endKeyTxn) {
if (endKeyTxn.compareKey(lastKeyTxn) < 0) {
LOG.info("Seeding stopped for source :" + sourceInfo.getEventView() + ", as it has completed seeding upto the endSrckey :" + endKeyTxn + ", numRowsThisRound :" + numRowsThisRound + ", _numRowsPerQuery :" + _numRowsPerQuery + ", numUniqueKeys :" + numUniqueKeysThisRound + " , Current SrcKey :" + lastKeyTxn);
done = true;
}
}
if (currRowId > 0 && (!first || done)) {
//Since next time, we will read the last seen record again
currRowId--;
}
LOG.info("about to call end events with currRowId = " + currRowId);
first = false;
_bootstrapSeedWriter.endEvents(currRowId, timestamp, null);
isException = false;
} catch (SQLException ex) {
LOG.error("Got SQLException for source (" + sourceInfo + ")", ex);
_bootstrapSeedWriter.rollbackEvents();
numRetry++;
isException = true;
if (numRetry >= retryMax) {
throw new DatabusException("Error: Reached max retries for reading/processing bootstrap", ex);
}
} finally {
DBHelper.close(rs);
rs = null;
}
}
} catch (DatabusException ex) {
isException = true;
throw ex;
} finally {
DBHelper.close(rs, pstmt, conn);
keyTxnWriter.close();
rs = null;
_rate.suspend();
if (!isException) {
dedupeKeyTxnFile(_keyTxnFilesMap.get(srcName), keyType);
}
}
long timeEnd = System.currentTimeMillis();
long elapsedMin = (timeEnd - timeStart) / (MILLISEC_TO_MIN);
LOG.info("Processed " + numRowsFetched + " rows of Source: " + sourceInfo.getSourceName() + " in " + elapsedMin + " minutes");
return new EventReaderSummary(sourceInfo.getSourceId(), sourceInfo.getSourceName(), -1, numRowsFetched, totalEventSize, (timeEnd - timeStart), totProcessTime, 0, 0, 0);
}
use of com.linkedin.databus2.producers.db.EventReaderSummary in project databus by linkedin.
the class OracleTxlogEventReader method readEventsFromAllSources.
@Override
public ReadEventCycleSummary readEventsFromAllSources(long sinceSCN) throws DatabusException, EventCreationException, UnsupportedKeyException {
boolean eventBufferNeedsRollback = true;
boolean debugEnabled = _log.isDebugEnabled();
List<EventReaderSummary> summaries = new ArrayList<EventReaderSummary>();
try {
long cycleStartTS = System.currentTimeMillis();
_eventBuffer.startEvents();
// Open the database connection if it is closed (at start or after an SQLException)
if (_eventSelectConnection == null || _eventSelectConnection.isClosed()) {
resetConnections();
}
/**
* Chunking in Relay:
* =================
*
* Variables used:
* ===============
*
* 1. _inChunking : Flag to indicate if the relay is in chunking mode
* 2. _chunkingType : Type of chunking supported
* 3. _chunkedScnThreshold :
* The threshold Scn diff which triggers chunking. If the relay's maxScn is older
* than DB's maxScn by this threshold, then chunking will be enabled.
* 4. _txnsPerChunk : Chunk size of txns for txn based chunking.
* 5. _scnChunkSize : Chunk Size for scn based chunking.
* 6. _catchupTargetMaxScn : Cached copy of DB's maxScn used as chunking's target SCN.
*
* =========================================
* Behavior of Chunking for Slow Sources:
* =========================================
*
* The slow sources case that is illustrated here is when all the sources in the sourcesList (fetched by relay) is slow.
* In this case, the endOfPeriodSCN will not increase on its own whereas in all other cases, it will.
*
* At startup, if the _catchupTargetMaxScn - currScn > _chunkedScnThreshold, then chunking is enabled.
* 1. Txn_based_chunking
*
* a) If chunking is on at startup, then txn-based chunking query is used. Otherwise, regular query is used.
* b) For a period till SLOW_SOURCE_QUERY_THRESHOLD msec, the endOfPeriodSCN/SinceSCN will not increase.
* c) After SLOW_SOURCE_QUERY_THRESHOLD msec, the sinceScn/endOfPeriodSCN will be increased to current MaxScn. If chunking was previously enabled
* at this time, it will be disabled upto MAX_SCN_DELAY_MS msec after which _catchupTargetMaxScn will be refreshed.
* d) if the new _catchupTargetMaxScn - currScn > _chunkedScnThreshold, then chunking is again enabled.
* e) go to (b)
*
* 2. SCN based Chunking
* a) If chunking is on at startup, then scn-based chunking query is used. Otherwise, regular query is used.
* b) For a period till SLOW_SOURCE_QUERY_THRESHOLD msec, the endOfPeriodSCN/SinceSCN keep increasing by _scnChunkSize with no rows fetched.
* c) When _catchupTargetMaxScn - endOfPeriodSCN < _chunkedScnThreshold, then chunking is disabled and regular query kicks in and in this
* phase sinceSCN/endOfPeriodSCN will not increase. After MAX_SCN_DELAY_MS interval, _catchupTargetSCN will be refreshed.
* d) If the new _catchupTargetMaxScn - currScn > _chunkedScnThreshold, then SCN chunking is again enabled.
* e) go to (b) *
*
*/
if (sinceSCN <= 0) {
_catchupTargetMaxScn = sinceSCN = getMaxTxlogSCN(_eventSelectConnection);
_log.debug("sinceSCN was <= 0. Overriding with the current max SCN=" + sinceSCN);
_eventBuffer.setStartSCN(sinceSCN);
try {
DBHelper.commit(_eventSelectConnection);
} catch (SQLException s) {
DBHelper.rollback(_eventSelectConnection);
}
} else if ((_chunkingType.isChunkingEnabled()) && (_catchupTargetMaxScn <= 0)) {
_catchupTargetMaxScn = getMaxTxlogSCN(_eventSelectConnection);
_log.debug("catchupTargetMaxScn was <= 0. Overriding with the current max SCN=" + _catchupTargetMaxScn);
}
if (_catchupTargetMaxScn <= 0)
_inChunkingMode = false;
// Get events for each source
List<OracleTriggerMonitoredSourceInfo> filteredSources = filterSources(sinceSCN);
long endOfPeriodScn = EventReaderSummary.NO_EVENTS_SCN;
for (OracleTriggerMonitoredSourceInfo source : _sources) {
if (filteredSources.contains(source)) {
long startTS = System.currentTimeMillis();
EventReaderSummary summary = readEventsFromOneSource(_eventSelectConnection, source, sinceSCN);
summaries.add(summary);
endOfPeriodScn = Math.max(endOfPeriodScn, summary.getEndOfPeriodSCN());
long endTS = System.currentTimeMillis();
source.getStatisticsBean().addTimeOfLastDBAccess(endTS);
if (_eventsLog.isDebugEnabled() || (_eventsLog.isInfoEnabled() && summary.getNumberOfEvents() > 0)) {
_eventsLog.info(summary.toString());
}
// Update statistics for the source
if (summary.getNumberOfEvents() > 0) {
source.getStatisticsBean().addEventCycle(summary.getNumberOfEvents(), endTS - startTS, summary.getSizeOfSerializedEvents(), summary.getEndOfPeriodSCN());
} else {
source.getStatisticsBean().addEmptyEventCycle();
}
} else {
source.getStatisticsBean().addEmptyEventCycle();
}
}
_lastSeenEOP = Math.max(_lastSeenEOP, Math.max(endOfPeriodScn, sinceSCN));
// If we did not read any events in this cycle then get the max SCN from the txlog. This
// is for slow sources so that the endOfPeriodScn never lags too far behind the max scn
// in the txlog table.
long curtime = System.currentTimeMillis();
if (endOfPeriodScn == EventReaderSummary.NO_EVENTS_SCN) {
// If in SCN Chunking mode, its possible to get empty batches for a SCN range,
if ((sinceSCN + _scnChunkSize <= _catchupTargetMaxScn) && (ChunkingType.SCN_CHUNKING == _chunkingType)) {
endOfPeriodScn = sinceSCN + _scnChunkSize;
_lastquerytime = curtime;
} else if (ChunkingType.TXN_CHUNKING == _chunkingType && _inChunkingMode) {
long nextBatchScn = getMaxScnSkippedForTxnChunked(_eventSelectConnection, sinceSCN, _txnsPerChunk);
_log.info("No events while in txn chunking. CurrScn : " + sinceSCN + ", jumping to :" + nextBatchScn);
endOfPeriodScn = nextBatchScn;
_lastquerytime = curtime;
} else if ((curtime - _lastquerytime) > _slowQuerySourceThreshold) {
_lastquerytime = curtime;
//get new start scn for subsequent calls;
final long maxTxlogSCN = getMaxTxlogSCN(_eventSelectConnection);
//For performance reasons, getMaxTxlogSCN() returns the max scn only among txlog rows
//which have their scn rewritten (i.e. scn < infinity). This allows the getMaxTxlogSCN
//query to be evaluated using only the SCN index. Getting the true max SCN requires
//scanning the rows where scn == infinity which is expensive.
//On the other hand, readEventsFromOneSource will read the latter events. So it is
//possible that maxTxlogSCN < scn of the last event in the buffer!
//We use max() to guarantee that there are no SCN regressions.
endOfPeriodScn = Math.max(maxTxlogSCN, sinceSCN);
_log.info("SlowSourceQueryThreshold hit. currScn : " + sinceSCN + ". Advanced endOfPeriodScn to " + endOfPeriodScn + " and added the event to relay");
if (debugEnabled) {
_log.debug("No events processed. Read max SCN from txlog table for endOfPeriodScn. endOfPeriodScn=" + endOfPeriodScn);
}
}
if (endOfPeriodScn != EventReaderSummary.NO_EVENTS_SCN && endOfPeriodScn > sinceSCN) {
// If the SCN has moved forward in the above if/else loop, then
_log.info("The endOfPeriodScn has advanced from to " + endOfPeriodScn);
_eventBuffer.endEvents(endOfPeriodScn, _relayInboundStatsCollector);
eventBufferNeedsRollback = false;
} else {
eventBufferNeedsRollback = true;
}
} else {
//we have appended some events; and a new end of period has been found
_lastquerytime = curtime;
_eventBuffer.endEvents(endOfPeriodScn, _relayInboundStatsCollector);
if (debugEnabled) {
_log.debug("End of events: " + endOfPeriodScn + " windown range= " + _eventBuffer.getMinScn() + "," + _eventBuffer.lastWrittenScn());
}
//no need to roll back
eventBufferNeedsRollback = false;
}
//save endOfPeriodScn if new one has been discovered
if (endOfPeriodScn != EventReaderSummary.NO_EVENTS_SCN) {
if (null != _maxScnWriter && (endOfPeriodScn != sinceSCN)) {
_maxScnWriter.saveMaxScn(endOfPeriodScn);
}
for (OracleTriggerMonitoredSourceInfo source : _sources) {
//update maxDBScn here
source.getStatisticsBean().addMaxDBScn(endOfPeriodScn);
source.getStatisticsBean().addTimeOfLastDBAccess(System.currentTimeMillis());
}
}
long cycleEndTS = System.currentTimeMillis();
//check if we should refresh _catchupTargetMaxScn
if (_chunkingType.isChunkingEnabled() && (_lastSeenEOP >= _catchupTargetMaxScn) && (curtime - _lastMaxScnTime >= _maxScnDelayMs)) {
//reset it to -1 so it gets refreshed next time around
_catchupTargetMaxScn = -1;
}
boolean chunkMode = _chunkingType.isChunkingEnabled() && (_catchupTargetMaxScn > 0) && (_lastSeenEOP < _catchupTargetMaxScn);
if (!chunkMode && _inChunkingMode)
_log.info("Disabling chunking for sources !!");
_inChunkingMode = chunkMode;
if (_inChunkingMode && debugEnabled)
_log.debug("_inChunkingMode = true, _catchupTargetMaxScn=" + _catchupTargetMaxScn + ", endOfPeriodScn=" + endOfPeriodScn + ", _lastSeenEOP=" + _lastSeenEOP);
ReadEventCycleSummary summary = new ReadEventCycleSummary(_name, summaries, Math.max(endOfPeriodScn, sinceSCN), (cycleEndTS - cycleStartTS));
// Have to commit the transaction since we are in serializable isolation level
DBHelper.commit(_eventSelectConnection);
// Return the event summaries
return summary;
} catch (SQLException ex) {
try {
DBHelper.rollback(_eventSelectConnection);
} catch (SQLException s) {
throw new DatabusException(s.getMessage());
}
;
handleExceptionInReadEvents(ex);
throw new DatabusException(ex);
} catch (Exception e) {
handleExceptionInReadEvents(e);
throw new DatabusException(e);
} finally {
// If that happens, rollback the event buffer.
if (eventBufferNeedsRollback) {
if (_log.isDebugEnabled()) {
_log.debug("Rolling back the event buffer because eventBufferNeedsRollback is true.");
}
_eventBuffer.rollbackEvents();
}
}
}
Aggregations